diff --git a/README.md b/README.md index 39de76d..1a69944 100644 --- a/README.md +++ b/README.md @@ -1,123 +1,272 @@ # gemini-php -PHP 8 Library for [Gemini Protocol](https://geminiprotocol.net) - -_For optimization reasons, some experimental features like `Dokuwiki` and `GTK3/Pango` was dropped from `1.0.0` release, but available in [previous versions](https://github.com/YGGverse/gemini-php/releases/tag/0.10.1). `Gemtext` component re-implemented as separated library (see [Extras](#extras))_ - -## Extras - -* [gemtext-php](https://github.com/YGGverse/gemtext-php) - Object-oriented PHP 8 library for Gemini / Gemtext operations +PHP 8 Library for Gemini Protocol ## Usage ``` -composer require yggverse/gemini +composer require yggverse/gemini:dev-main ``` -## Client +## DokuWiki -PHP interface for Gemini protocol queries by TLS socket connection +Toolkit provides DokuWiki API for Gemini. -### Request +Allows to simple deploy new apps or make existing website mirror -``` php -$request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org:1965/index.gmi' +### Examples + +* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol + +### Reader + +Read DokuWiki and convert to Gemini + +``` +$reader = new \Yggverse\Gemini\Dokuwiki\Reader( + // optional regex rule set array ); ``` -**Resolved request (SNI)** +#### Reader::getRules +#### Reader::setRules +#### Reader::getRule +#### Reader::setRule -For direct connection provide resolved IP as the second argument +Get or change existing regex rule (or just skip by using build-in set) -``` php -$request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org:1965/index.gmi' // target URL - '68.133.1.71' // resolved IP, skip to use system-wide resolver +``` +echo $reader->setRule( + '/subject/ui', + 'replacement' ); ``` -Alternatively, use `setResolvedHost` method of `Request` object before `getResponse` +#### Reader::getMacroses +#### Reader::setMacroses +#### Reader::getMacros +#### Reader::setMacros -#### Request::setResolvedHost - -``` php -$request->setResolvedHost( - '68.133.1.71' -) ``` - -* to resolve network address with PHP, take a look on the [net-php](https://github.com/YGGverse/net-php) library! - -#### Request::getResolvedHost - -Get resolved host back - -#### Request::setHost -#### Request::getHost -#### Request::setPort -#### Request::getPort -#### Request::setPath -#### Request::getPath -#### Request::setQuery -#### Request::getQuery -#### Request::getResponse - -Execute requested URL and return raw response - -``` php -var_dump( - $request->getResponse() +echo $reader->setMacros( + '~my-macros-key~', + '~my-macros-value~', ); ``` -#### Request::getOptions -#### Request::setOptions +#### Reader::toGemini -``` php -$request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org', - '68.133.1.71' // make direct request to the resolved host -); +Convert DokuWiki text to Gemini markup -$request->setOptions( - [ - 'ssl' => - [ - 'peer_name' => 'yggverse.cities.yesterweb.org', // SNI - 'verify_peer' => false, - 'verify_peer_name' => false - ] - ] +As wiki has lot of inline links, to make converted document well-readable, this method does not replace links with new line `=>` macros, but uses inline context: `Name ( URL )`. This model useful with `Reader::getLinks` method, that for example appends all those related links to the document footer. + +If you don't like this implementation, feel free to change it by `Reader::setRule` method! + +``` +echo $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) ); ``` -### Response +#### Reader::getH1 -This class provides additional features for the raw response operations +Get document title -``` php -$response = new \Yggverse\Gemini\Client\Response( - $request->getResponse() +``` +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getH1( + $gemini ); ``` -#### Response::setCode -#### Response::getCode -#### Response::setMeta -#### Response::getMeta -#### Response::setBody -#### Response::getBody +#### Reader::getLinks -``` php -var_dump( - $response->getBody() +Get document links + +``` +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getLinks( + $gemini ); ``` -## Integrations +### Filesystem -* [gemini-dl](https://github.com/YGGverse/gemini-dl) - CLI Batch downloader for Gemini Protocol -* [Yo!](https://github.com/YGGverse/Yo/tree/gemini) - Crawler for different networks -* [Yoda](https://github.com/YGGverse/Yoda) - PHP-GTK browser for Gemini Protocol -* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol \ No newline at end of file +Provides methods for simple and secure interaction with DokuWiki file storage + +``` +$filesystem = new \Yggverse\Gemini\Dokuwiki\Filesystem( + '/host/data' // storage location +); +``` + +#### Filesystem::getList + +Return simple array of all files in storage + +``` +var_dump ( + $filesystem->getList( + 'hello:world' + ) +); +``` + +#### Filesystem::getTree + +Return all files under the storage folder in tree format + +``` +var_dump ( + $filesystem->getTree( + 'hello:world' + ) +); +``` + +#### Filesystem::getPagePathsByPath + +Return pages under the given data directory + +``` +var_dump ( + $filesystem->getPagePathsByPath( + // absolute path to target data directory (e.g. Filesystem::getDirectoryPathByUri) + ) +); +``` + +#### Filesystem::getDirectoryPathByUri +#### Filesystem::getPagePathByUri + +Return absolute path to stored page file + +``` +var_dump ( + $filesystem->getPagePathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getDirectoryUriByPath +#### Filesystem::getPageUriByPath + +Return page URI in `dokuwiki:format` + +``` +var_dump ( + $filesystem->getPageUriByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getMediaPathByUri + +Return absolute path to stored media file + +``` +var_dump ( + $filesystem->getMediaPathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getMimeByPath + +Return file MIME if path match storage item + +``` +var_dump ( + $filesystem->getMimeByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getDataByPath + +Return file content if path match storage item + +``` +var_dump ( + $filesystem->getDataByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::isPath + +Check path exist and match storage item + +``` +var_dump ( + $filesystem->isPath( + '/full/path/to/page.txt' + ) +); +``` + +### Helper + +Useful methods to minify controller codebase + +``` +$helper = new \Yggverse\Gemini\Dokuwiki\Helper( + new \Yggverse\Gemini\Dokuwiki\Filesystem(), + new \Yggverse\Gemini\Dokuwiki\Reader() +); +``` + +#### Helper::getChildrenSectionLinksByUri + +Return simple array of children section links in Gemini format + +``` +var_dump ( + $helper->getChildrenSectionLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getChildrenPageLinksByUri + +Return simple array of children page links in Gemini format + +``` +var_dump ( + $helper->getChildrenPageLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getPageLinkByPath + +Return page link (that contain document name) in Gemini format + +``` +var_dump ( + $helper->getPageLinkByPath( + $filesystem->getPagePathByUri( + 'hello:world' + ) + ) +); +``` \ No newline at end of file diff --git a/composer.json b/composer.json index 33c1ae1..efc7aa6 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "yggverse/gemini", "description": "PHP 8 Library for Gemini Protocol", - "keywords": [ "yggverse", "gemini", "gemini-protocol", "client", "request", "response" ], + "keywords": [ "yggverse", "gemini", "wiki", "dokuwiki", "markdown" ], "homepage": "https://github.com/yggverse/gemini-php", "type": "library", "license": "MIT", @@ -10,5 +10,7 @@ "Yggverse\\Gemini\\": "src/" } }, - "require": {} + "require": { + "dekor/php-array-table": "^2.0" + } } diff --git a/src/Client/Request.php b/src/Client/Request.php deleted file mode 100644 index 183ce56..0000000 --- a/src/Client/Request.php +++ /dev/null @@ -1,216 +0,0 @@ - - [ - 'allow_self_signed' => true, - 'disable_compression' => true, - 'verify_peer_name' => false, - 'verify_peer' => false - ] - ]; - - public function __construct(string $url, ?string $ip = null) - { - if ($host = parse_url($url, PHP_URL_HOST)) - { - $this->setHost( - $host - ); - } - - else - { - throw new \Exception( - _('Host required') - ); - } - - if ($port = parse_url($url, PHP_URL_PORT)) - { - $this->setPort( - $port - ); - } - - else - { - $this->setPort( - 1965 - ); - } - - if ($path = parse_url($url, PHP_URL_PATH)) - { - $this->setPath( - $path - ); - } - - else - { - $this->setPath( - '' - ); - } - - if ($query = parse_url($url, PHP_URL_QUERY)) - { - $this->setQuery( - $query - ); - } - - else - { - $this->setQuery( - '' - ); - } - - if ($ip && false !== filter_var($ip, FILTER_VALIDATE_IP)) - { - $this->setResolvedHost( - $ip - ); - } - } - - public function setOptions(array $value): void - { - $this->_options = $value; - } - - public function getOptions(): array - { - return $this->_options; - } - - public function setHost(string $value): void - { - $this->_host = $value; - } - - public function getHost(): string - { - return $this->_host; - } - - public function setPort(int $value): void - { - $this->_port = $value; - } - - public function getPort(): int - { - return $this->_port; - } - - public function setPath(string $value): void - { - $this->_path = $value; - } - - public function getPath(): string - { - return $this->_path; - } - - public function setQuery(string $value): void - { - $this->_query = $value; - } - - public function getQuery(): string - { - return $this->_query; - } - - public function setResolvedHost(?string $value): void - { - $this->_ip = $value; - } - - public function getResolvedHost(): ?string - { - return $this->_ip; - } - - public function getResponse( - int $timeout = 30, // socket timeout, useful for offline resources - ?int $limit = null, // content length, null for unlimited - ?int &$length = 0, // initial response length, do not change without special needs - ?int &$code = null, // error code for debug - ?string &$message = null, // error message for debug - string &$response = '' // response init, also returning by this method - ): ?string - { - $connection = stream_socket_client( - sprintf( - 'tls://%s:%d', - $this->_ip ? $this->_ip : $this->_host, - $this->_port - ), - $code, - $message, - $timeout, - STREAM_CLIENT_CONNECT, - stream_context_create( - $this->_options - ) - ); - - if (!is_resource($connection)) - { - return null; - } - - fwrite( - $connection, - sprintf( - "gemini://%s:%d%s%s\r\n", - $this->_host, - $this->_port, - $this->_path, - $this->_query ? sprintf( - '?%s', - $this->_query - ) : null - ) - ); - - while ($part = fgets($connection)) - { - $length = $length + mb_strlen( - $part - ); - - if ($limit && $length > $limit) - { - break; - } - - $response .= $part; - } - - fclose( - $connection - ); - - return $response; - } -} \ No newline at end of file diff --git a/src/Client/Response.php b/src/Client/Response.php deleted file mode 100644 index aabc7cf..0000000 --- a/src/Client/Response.php +++ /dev/null @@ -1,84 +0,0 @@ -\d{2})(?.*)$/m', - $data, - $match - ); - - if (isset($match['code'])) - { - $code = (int) $match['code']; - - if ($code >= 10 && $code <= 69) - { - $this->setCode( - $code - ); - } - } - - if (isset($match['meta']) && mb_strlen($match['meta']) <= 1024) - { - $this->setMeta( - trim( - (string) $match['meta'] - ) - ); - } - - if ($body = substr($data, strpos($data, chr(10)) + 1)) - { - $this->setBody( - (string) $body - ); - } - } - } - - public function setCode(?int $value): void - { - $this->_code = $value; - } - - public function getCode(): ?int - { - return $this->_code; - } - - public function setMeta(?string $value): void - { - $this->_meta = $value; - } - - public function getMeta(): ?string - { - return $this->_meta; - } - - public function setBody(?string $value): void - { - $this->_body = $value; - } - - public function getBody(): ?string - { - return $this->_body; - } -} \ No newline at end of file diff --git a/src/Dokuwiki/Filesystem.php b/src/Dokuwiki/Filesystem.php new file mode 100644 index 0000000..33b79c2 --- /dev/null +++ b/src/Dokuwiki/Filesystem.php @@ -0,0 +1,277 @@ +_path = rtrim( + $path, + '/' + ); + + $this->_index( + $this->_path + ); + } + + public function getTree(): array + { + return $this->_tree; + } + + public function getList(): array + { + return $this->_list; + } + + public function getPagePathsByPath(string $path): ?array + { + if (isset($this->_tree[$path])) + { + return $this->_tree[$path]; + } + + return null; + } + + public function getPagePathByUri(string $uri): ?string + { + $path = sprintf( + '%s/pages/%s.txt', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getPageUriByPath(string $path): ?string + { + if (!$this->isPath($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages/', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/', + '.txt' + ], + [ + ':', + null + ], + $path + ); + + return $path; + } + + public function getDirectoryPathByUri(string $uri = ''): ?string + { + $path = rtrim( + sprintf( + '%s/pages/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ), + '/' + ); + + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + return $path; + } + + public function getDirectoryUriByPath(string $path): ?string + { + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/' + ], + [ + ':' + ], + $path + ); + + return $path; + } + + public function getMediaPathByUri(string $uri): ?string + { + $path = sprintf( + '%s/media/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getMimeByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($mime = mime_content_type($path)) + { + return $mime; + } + } + + return null; + } + + public function getDataByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($data = file_get_contents($path)) + { + return $data; + } + } + + return null; + } + + public function isPath(?string $path): bool + { + if (in_array($path, $this->_list) && is_file($path) && is_readable($path)) + { + return true; + } + + return false; + } + + private function _index( + string $path, + ?array $blacklist = ['sidebar.txt', '__template.txt'] + ): void + { + foreach ((array) scandir($path) as $file) + { + if (str_starts_with($file, '.')) + { + continue; + } + + if (is_link($file)) + { + continue; + } + + if (in_array($file, $blacklist)) + { + continue; + } + + $file = sprintf( + '%s/%s', + $path, + $file + ); + + switch (true) + { + case is_dir($file): + + if (!isset($this->_tree[$path])) + { + $this->_tree[$path] = []; + } + + $this->_index($file); + + break; + + case is_file($file): + + $this->_tree[$path][] = $file; + + $this->_list[] = $file; + + break; + } + } + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Helper.php b/src/Dokuwiki/Helper.php new file mode 100644 index 0000000..232b10f --- /dev/null +++ b/src/Dokuwiki/Helper.php @@ -0,0 +1,154 @@ +_filesystem = $filesystem; + $this->_reader = $reader; + } + + public function getChildrenSectionLinksByUri(?string $uri = ''): array + { + $sections = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getTree() as $path => $files) + { + if (str_starts_with($path, $directory) && $path != $directory) + { + // Init link name + $h1 = null; + + // Init this directory URI + $thisUri = $this->_filesystem->getDirectoryUriByPath( + $path + ); + + // Skip sections deeper this level + if (substr_count($thisUri, ':') > ($uri ? substr_count($uri, ':') + 1 : 0)) + { + continue; + } + + // Get section names + $segments = []; + + foreach ((array) explode(':', $thisUri) as $segment) + { + $segments[] = $segment; + + // Find section index if exists + if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments) . ':' . $segment)) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Find section page if exists + else if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments))) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Reset title of undefined segment + else + { + $h1 = null; + } + } + + // Register section link + $sections[] = sprintf( + '=> /%s %s', + $thisUri, + $h1 + ); + } + } + } + + // Keep unique + $sections = array_unique( + $sections + ); + + // Sort asc + sort( + $sections + ); + + return $sections; + } + + public function getChildrenPageLinksByUri(?string $uri = ''): array + { + $pages = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getPagePathsByPath($directory) as $file) + { + if ($link = $this->getPageLinkByPath($file)) + { + $pages[] = $link; + } + } + } + + // Keep unique + $pages = array_unique( + $pages + ); + + // Sort asc + sort( + $pages + ); + + return $pages; + } + + public function getPageLinkByPath(string $path): ?string + { + if (in_array($path, $this->_filesystem->getList()) && is_file($path) && is_readable($path)) + { + return sprintf( + '=> /%s %s', + $this->_filesystem->getPageUriByPath( + $path + ), + $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $path + ) + ) + ) + ); + } + + return null; + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Reader.php b/src/Dokuwiki/Reader.php new file mode 100644 index 0000000..95accef --- /dev/null +++ b/src/Dokuwiki/Reader.php @@ -0,0 +1,412 @@ + null, + '~IPv6:open~' => '[', + '~IPv6:close~' => ']', + '~LINE:break~' => PHP_EOL + ]; + + private array $_rule = + [ + // Headers + '/^([\s]*)#([^#]+)/' => '$1#$2' . PHP_EOL, + '/^([\s]*)##([^#]+)/' => '$1##$2' . PHP_EOL, + '/^([\s]*)###([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)#####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)######([^#]+)/' => '$1###$2' . PHP_EOL, + + '/^[\s]*[=]{6}([^=]+)[=]{6}/' => '# $1' . PHP_EOL, + '/^[\s]*[=]{5}([^=]+)[=]{5}/' => '## $1' . PHP_EOL, + '/^[\s]*[=]{4}([^=]+)[=]{4}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{3}([^=]+)[=]{3}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{2}([^=]+)[=]{2}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{1}([^=]+)[=]{1}/' => '### $1' . PHP_EOL, + + // Tags + '/\*\*/' => '', + '/\'\'/' => '', + '/\%\%/' => '', + '/(? '', + + // Remove extra spaces + '/(\s)\s+/' => '$1', + + // Links + + /// Detect IPv6 (used as no idea how to resolve square quotes in rules below) + '/\[\[([^\[]+)\[([A-f:0-9]*)\]([^\]]+)\]\]/' => '$1~IPv6:open~$2~IPv6:close~$3', + + /// Remove extra chars + '/\[\[\s*\:?([^\|]+)\s*\|\s*([^\]]+)\s*\]\]/' => '[[$1|$2]]', + '/\[\[\s*\:?([^\]]+)\s*\]\]/' => '[[$1]]', + + '/\{\{\s*\:?([^\|]+)\s*\|\s*([^\}]+)\s*\}\}/' => '{{$1|$2}}', + '/\{\{\s*\:?([^\}]+)\s*\}\}/' => '{{$1}}', + + /// Wikipedia + '/\[\[wp([A-z]{2,})>([^\|]+)\|([^\]]+)\]\]/ui' => '$3 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\|]+)\|([^\]]+)\]\]/i' => '$2 ( https://en.wikipedia.org/wiki/$1 )', + '/\[\[wp([A-z]{2,})>([^\]]+)\]\]/i' => '$2 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\]]+)\]\]/i' => '$1 ( https://en.wikipedia.org/wiki/$1 )', + + /// Dokuwiki + '/\[\[doku>([^\|]+)\|([^\]]+)\]\]/i' => '$2( https://www.dokuwiki.org/$1 )', + '/\[\[doku>([^\]]+)\]\]/i' => '$1( https://www.dokuwiki.org/$1 )', + + /// Index + /// Useful with src/Dokuwiki/Helper.php + '/\{\{indexmenu>:([^\}]+)\}\}/i' => '', + '/\{\{indexmenu_n>[\d]+\}\}/i' => '', + + // Related + '/\[\[this>([^\|]+)\|([^\]]+)\]\]/i' => '$2', + + /// Relative + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\|]+)\|([^\]]+)\]\]/i' => ' $2$3 ( ~URL:base~$1 )', + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\]]+)\]\]/i' => ' $2 ( ~URL:base~$1 )', + + /// Absolute + '/\[\[(https?:)([^\|]+)\|([^\]]+)\]\]/i' => '$3 ( $1$2 )', + '/\[\[(https?:)([^\]]+)\]\]/i' => '$1$2', // @TODO + + /// Media + '/\{\{(?!https?:)([^\|]+)\|([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + '/\{\{(?!https?:)([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + + // List + '/^[\s]?-/' => '* ', + '/^[\s]+\*/' => '*', + + // Separators + '/[\\\]{2}/' => '~LINE:break~', + + // Plugins + '/~~DISCUSSION~~/' => '', // @TODO + '/~~INFO:syntaxplugins~~/' => '', // @TODO + + // Final corrections + '/[\n\r]+[.,;:]+/' => PHP_EOL + ]; + + public function __construct(?array $rules = null) + { + if ($rules) + { + $this->_rule = $rules; + } + } + + // Macros operations + public function getMacroses(): array + { + $this->_macros; + } + + public function setMacroses(array $macros) + { + $this->_macros = $macros; + } + + public function getMacros(string $key, string $value): ?string + { + $this->_macros[$key] = isset($this->_macros[$key]) ? $value : null; + } + + public function setMacros(string $key, ?string $value): void + { + if ($value) + { + $this->_macros[$key] = $value; + } + + else + { + unset( + $this->_macros[$key] + ); + } + } + + // Rule operations + public function getRules(): array + { + $this->_rule; + } + + public function setRules(array $rules) + { + $this->_rule = $rules; + } + + public function getRule(string $key, string $value): ?string + { + $this->_rule[$key] = isset($this->_rule[$key]) ? $value : null; + } + + public function setRule(string $key, ?string $value): void + { + if ($value) + { + $this->_rule[$key] = $value; + } + + else + { + unset( + $this->_rule[$key] + ); + } + } + + // Convert DokuWiki text to Gemini + public function toGemini(?string $data, ?array &$lines = []): ?string + { + if (empty($data)) + { + return null; + } + + $raw = false; + + $lines = []; + + foreach ((array) explode(PHP_EOL, $data) as $line) + { + // Skip any formatting in lines between code tag + if (!$raw && preg_match('/<(code|file)([^>]*)>/i', $line, $matches)) + { + // Prepend tag meta or filename as plain description + if (!empty($matches[0])) + { + $lines[] = preg_replace( + '/<(code|file)[\s-]*([^>]*)>/i', + '$2', + $matches[0] + ); + } + + $lines[] = '```'; + $lines[] = preg_replace( + '/<\/?(code|file)[^>]*>/i', + '', + $line + ); + + $raw = true; + + // Make sure inline tag closed + if (preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = '```'; + + $raw = false; + + continue; + } + + continue; + } + + if ($raw && preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = preg_replace( + '/<\/(code|file)>/i', + '', + $line + ); + + $lines[] = '```'; + + $raw = false; + + continue; + } + + if ($raw) + { + $lines[] = preg_replace( + '/^```/', + ' ```', + $line + ); + + continue; + } + + // Apply config + $lines[] = preg_replace( + array_keys( + $this->_rule + ), + array_values( + $this->_rule + ), + strip_tags( + $line + ) + ); + } + + // ASCII table + $table = false; + + $rows = []; + + $th = []; + + foreach ($lines as $index => $line) + { + // Strip line breaks + $line = str_replace( + '~LINE:break~', + ' ', + $line + ); + + // Header + if (!$table && preg_match_all('/\^([^\^]+)/', $line, $matches)) + { + if (!empty($matches[1])) + { + $table = true; + + $rows = []; + + $th = []; + + foreach ($matches[1] as $value) + { + $th[] = trim( + $value + ); + } + + unset( + $lines[$index] + ); + + continue; + } + } + + // Body + if ($table) + { + $table = false; + + if (preg_match(sprintf('/%s\|/', str_repeat('\|(.*)', count($th))), $line, $matches)) + { + if (count($matches) == count($th) + 1) + { + $table = true; + + $row = []; + foreach ($th as $offset => $column) + { + $row[$column] = trim( + $matches[$offset + 1] + ); + } + + $rows[] = $row; + + unset( + $lines[$index] + ); + } + } + + if (!$table && $rows) + { + $builder = new ArrayToTextTable( + $rows + ); + + $lines[$index] = '```' . PHP_EOL . $builder->render() . PHP_EOL . '```'; + } + } + } + + // Merge lines + return preg_replace( + '/[\n\r]{2,}/', + PHP_EOL . PHP_EOL, + str_replace( + array_keys( + $this->_macros + ), + array_values( + $this->_macros + ), + implode( + PHP_EOL, + $lines + ) + ) + ); + } + + public function getH1(?string $gemini, ?string $regex = '/^[\s]?#([^#]+)/'): ?string + { + foreach ((array) explode(PHP_EOL, (string) $gemini) as $line) + { + preg_match( + $regex, + $line, + $matches + ); + + if (!empty($matches[1])) + { + return trim( + $matches[1] + ); + + break; + } + } + + return null; + } + + public function getLinks(?string $gemini, ?string $regex = '/(https?|gemini):\/\/\S+/'): array + { + $links = []; + + if (empty($gemini)) + { + return $links; + } + + preg_match_all( + $regex, + $gemini, + $matches + ); + + if (!empty($matches[0])) + { + foreach ((array) $matches[0] as $link) + { + $links[] = trim( + $link + ); + } + } + + return array_unique( + $links + ); + } +} \ No newline at end of file