diff --git a/README.md b/README.md index 39de76d..6bfa299 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,6 @@ PHP 8 Library for [Gemini Protocol](https://geminiprotocol.net) -_For optimization reasons, some experimental features like `Dokuwiki` and `GTK3/Pango` was dropped from `1.0.0` release, but available in [previous versions](https://github.com/YGGverse/gemini-php/releases/tag/0.10.1). `Gemtext` component re-implemented as separated library (see [Extras](#extras))_ - -## Extras - -* [gemtext-php](https://github.com/YGGverse/gemtext-php) - Object-oriented PHP 8 library for Gemini / Gemtext operations - ## Usage ``` @@ -115,9 +109,397 @@ var_dump( ); ``` +## Gemtext + +Object-oriented API for Gemtext + +**Deprecated and will be removed in future releases! Use [gemtext-php](https://github.com/YGGverse/gemtext-php) instead.** + +### Body + +Basic methods to work with `text/gemini` documents + +``` php +$body = new \Yggverse\Gemini\Gemtext\Body( + $response->getBody() // gemtext body from client response or .gmi file content +); +``` + +#### Body::getLines +#### Body::getLine +#### Body::getH1 +#### Body::getH2 +#### Body::getH3 +#### Body::getQuote +#### Body::getCode +#### Body::getLinks + +``` php +var_dump( + $body->getLinks() // returns array of links (with line number in key) +); +``` + +#### Body::findLinks + +Find context links by protocol as argument, `gemini` by default + +``` php +var_dump( + $body->findLinks('http') // returns array of http links only (with line number in key) +); +``` + +#### Body::skipTags + +Strip gemini tags from Gemini document + +``` php +var_dump( + $body->skipTags() // strip all tags +); + +var_dump( + $body->skipTags( + [ // 1- and 2- level headers only + "##", + "###" + ] + ) +); +``` + +### Link + +Inline links parser. + +Allows to extract address, date with timestamp and alt text from link line given + +``` php +foreach ($body->getLinks() as $line) +{ + $link = new \Yggverse\Gemini\Gemtext\Link( + $line + ); + + var_dump( + $link->getAddress() + ); + + var_dump( + $link->getAlt() + ); +} +``` + +#### Link::getAddress +#### Link::getDate + +This method also validates time format and returns the unix timestamp as linked argument + +``` php +var_dump( + $link->getDate( + $timestamp // get unix time from this variable + ) +); + +var_dump( + $timestamp +); +``` + +#### Link::getAlt + +## GTK3 + +### Pango + +Converter to GTK3-compatible Pango format + +#### Pango::fromGemtext + +``` php +$pango = \Yggverse\Gemini\Pango::fromGemtext( + $gemtext +); +``` + +#### Pango::fromGemtextBody + +``` php +$pango = \Yggverse\Gemini\Pango::fromGemtextBody( + new \Yggverse\Gemini\Gemtext\Body( + $gemtext + ) +); +``` + +## DokuWiki + +Toolkit provides DokuWiki API for Gemini. + +Allows to simple deploy new apps or make existing website mirror + +### Examples + +* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol + +### Reader + +Read DokuWiki and convert to Gemini + +``` php +$reader = new \Yggverse\Gemini\Dokuwiki\Reader( + // optional regex rule set array +); +``` + +#### Reader::getRules +#### Reader::setRules +#### Reader::getRule +#### Reader::setRule + +Get or change existing regex rule (or just skip by using build-in set) + +``` php +echo $reader->setRule( + '/subject/ui', + 'replacement' +); +``` + +#### Reader::getMacroses +#### Reader::setMacroses +#### Reader::getMacros +#### Reader::setMacros + +``` php +echo $reader->setMacros( + '~my-macros-key~', + '~my-macros-value~', +); +``` + +#### Reader::toGemini + +Convert DokuWiki text to Gemini markup + +As wiki has lot of inline links, to make converted document well-readable, this method does not replace links with new line `=>` macros, but uses inline context: `Name ( URL )`. This model useful with `Reader::getLinks` method, that for example appends all those related links to the document footer. + +If you don't like this implementation, feel free to change it by `Reader::setRule` method! + +``` php +echo $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); +``` + +#### Reader::getH1 + +Get document title + +``` php +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getH1( + $gemini +); +``` + +#### Reader::getLinks + +Get document links + +``` php +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getLinks( + $gemini +); +``` + +### Filesystem + +Provides methods for simple and secure interaction with DokuWiki file storage + +``` php +$filesystem = new \Yggverse\Gemini\Dokuwiki\Filesystem( + '/host/data' // storage location +); +``` + +#### Filesystem::getList + +Return simple array of all files in storage + +``` php +var_dump ( + $filesystem->getList( + 'hello:world' + ) +); +``` + +#### Filesystem::getTree + +Return all files under the storage folder in tree format + +``` php +var_dump ( + $filesystem->getTree( + 'hello:world' + ) +); +``` + +#### Filesystem::getPagePathsByPath + +Return pages under the given data directory + +``` php +var_dump ( + $filesystem->getPagePathsByPath( + // absolute path to target data directory (e.g. Filesystem::getDirectoryPathByUri) + ) +); +``` + +#### Filesystem::getDirectoryPathByUri +#### Filesystem::getPagePathByUri + +Return absolute path to stored page file + +``` php +var_dump ( + $filesystem->getPagePathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getDirectoryUriByPath +#### Filesystem::getPageUriByPath + +Return page URI in `dokuwiki:format` + +``` php +var_dump ( + $filesystem->getPageUriByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getMediaPathByUri + +Return absolute path to stored media file + +``` php +var_dump ( + $filesystem->getMediaPathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getMimeByPath + +Return file MIME if path match storage item + +``` php +var_dump ( + $filesystem->getMimeByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getDataByPath + +Return file content if path match storage item + +``` php +var_dump ( + $filesystem->getDataByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::isPath + +Check path exist and match storage item + +``` php +var_dump ( + $filesystem->isPath( + '/full/path/to/page.txt' + ) +); +``` + +### Helper + +Useful methods to minify controller codebase + +``` php +$helper = new \Yggverse\Gemini\Dokuwiki\Helper( + new \Yggverse\Gemini\Dokuwiki\Filesystem(), + new \Yggverse\Gemini\Dokuwiki\Reader() +); +``` + +#### Helper::getChildrenSectionLinksByUri + +Return simple array of children section links in Gemini format + +``` php +var_dump ( + $helper->getChildrenSectionLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getChildrenPageLinksByUri + +Return simple array of children page links in Gemini format + +``` php +var_dump ( + $helper->getChildrenPageLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getPageLinkByPath + +Return page link (that contain document name) in Gemini format + +``` php +var_dump ( + $helper->getPageLinkByPath( + $filesystem->getPagePathByUri( + 'hello:world' + ) + ) +); +``` + ## Integrations -* [gemini-dl](https://github.com/YGGverse/gemini-dl) - CLI Batch downloader for Gemini Protocol -* [Yo!](https://github.com/YGGverse/Yo/tree/gemini) - Crawler for different networks -* [Yoda](https://github.com/YGGverse/Yoda) - PHP-GTK browser for Gemini Protocol -* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol \ No newline at end of file +* [β-Doku is DokuWiki Satellite for Gemini Protocol](https://github.com/YGGverse/bdoku) +* [Yo! Crawler for different networks](https://github.com/YGGverse/Yo/tree/gemini) +* [Yoda - PHP-GTK browser for Gemini Protocol](https://github.com/YGGverse/Yoda) \ No newline at end of file diff --git a/composer.json b/composer.json index 33c1ae1..efc7aa6 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "yggverse/gemini", "description": "PHP 8 Library for Gemini Protocol", - "keywords": [ "yggverse", "gemini", "gemini-protocol", "client", "request", "response" ], + "keywords": [ "yggverse", "gemini", "wiki", "dokuwiki", "markdown" ], "homepage": "https://github.com/yggverse/gemini-php", "type": "library", "license": "MIT", @@ -10,5 +10,7 @@ "Yggverse\\Gemini\\": "src/" } }, - "require": {} + "require": { + "dekor/php-array-table": "^2.0" + } } diff --git a/src/Client/Request.php b/src/Client/Request.php index 183ce56..560a1fd 100644 --- a/src/Client/Request.php +++ b/src/Client/Request.php @@ -17,10 +17,8 @@ class Request [ 'ssl' => [ - 'allow_self_signed' => true, - 'disable_compression' => true, - 'verify_peer_name' => false, - 'verify_peer' => false + 'verify_peer' => false, + 'verify_peer_name' => false ] ]; @@ -186,10 +184,7 @@ class Request $this->_host, $this->_port, $this->_path, - $this->_query ? sprintf( - '?%s', - $this->_query - ) : null + $this->_query ) ); diff --git a/src/Dokuwiki/Filesystem.php b/src/Dokuwiki/Filesystem.php new file mode 100644 index 0000000..33b79c2 --- /dev/null +++ b/src/Dokuwiki/Filesystem.php @@ -0,0 +1,277 @@ +_path = rtrim( + $path, + '/' + ); + + $this->_index( + $this->_path + ); + } + + public function getTree(): array + { + return $this->_tree; + } + + public function getList(): array + { + return $this->_list; + } + + public function getPagePathsByPath(string $path): ?array + { + if (isset($this->_tree[$path])) + { + return $this->_tree[$path]; + } + + return null; + } + + public function getPagePathByUri(string $uri): ?string + { + $path = sprintf( + '%s/pages/%s.txt', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getPageUriByPath(string $path): ?string + { + if (!$this->isPath($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages/', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/', + '.txt' + ], + [ + ':', + null + ], + $path + ); + + return $path; + } + + public function getDirectoryPathByUri(string $uri = ''): ?string + { + $path = rtrim( + sprintf( + '%s/pages/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ), + '/' + ); + + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + return $path; + } + + public function getDirectoryUriByPath(string $path): ?string + { + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/' + ], + [ + ':' + ], + $path + ); + + return $path; + } + + public function getMediaPathByUri(string $uri): ?string + { + $path = sprintf( + '%s/media/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getMimeByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($mime = mime_content_type($path)) + { + return $mime; + } + } + + return null; + } + + public function getDataByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($data = file_get_contents($path)) + { + return $data; + } + } + + return null; + } + + public function isPath(?string $path): bool + { + if (in_array($path, $this->_list) && is_file($path) && is_readable($path)) + { + return true; + } + + return false; + } + + private function _index( + string $path, + ?array $blacklist = ['sidebar.txt', '__template.txt'] + ): void + { + foreach ((array) scandir($path) as $file) + { + if (str_starts_with($file, '.')) + { + continue; + } + + if (is_link($file)) + { + continue; + } + + if (in_array($file, $blacklist)) + { + continue; + } + + $file = sprintf( + '%s/%s', + $path, + $file + ); + + switch (true) + { + case is_dir($file): + + if (!isset($this->_tree[$path])) + { + $this->_tree[$path] = []; + } + + $this->_index($file); + + break; + + case is_file($file): + + $this->_tree[$path][] = $file; + + $this->_list[] = $file; + + break; + } + } + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Helper.php b/src/Dokuwiki/Helper.php new file mode 100644 index 0000000..232b10f --- /dev/null +++ b/src/Dokuwiki/Helper.php @@ -0,0 +1,154 @@ +_filesystem = $filesystem; + $this->_reader = $reader; + } + + public function getChildrenSectionLinksByUri(?string $uri = ''): array + { + $sections = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getTree() as $path => $files) + { + if (str_starts_with($path, $directory) && $path != $directory) + { + // Init link name + $h1 = null; + + // Init this directory URI + $thisUri = $this->_filesystem->getDirectoryUriByPath( + $path + ); + + // Skip sections deeper this level + if (substr_count($thisUri, ':') > ($uri ? substr_count($uri, ':') + 1 : 0)) + { + continue; + } + + // Get section names + $segments = []; + + foreach ((array) explode(':', $thisUri) as $segment) + { + $segments[] = $segment; + + // Find section index if exists + if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments) . ':' . $segment)) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Find section page if exists + else if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments))) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Reset title of undefined segment + else + { + $h1 = null; + } + } + + // Register section link + $sections[] = sprintf( + '=> /%s %s', + $thisUri, + $h1 + ); + } + } + } + + // Keep unique + $sections = array_unique( + $sections + ); + + // Sort asc + sort( + $sections + ); + + return $sections; + } + + public function getChildrenPageLinksByUri(?string $uri = ''): array + { + $pages = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getPagePathsByPath($directory) as $file) + { + if ($link = $this->getPageLinkByPath($file)) + { + $pages[] = $link; + } + } + } + + // Keep unique + $pages = array_unique( + $pages + ); + + // Sort asc + sort( + $pages + ); + + return $pages; + } + + public function getPageLinkByPath(string $path): ?string + { + if (in_array($path, $this->_filesystem->getList()) && is_file($path) && is_readable($path)) + { + return sprintf( + '=> /%s %s', + $this->_filesystem->getPageUriByPath( + $path + ), + $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $path + ) + ) + ) + ); + } + + return null; + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Reader.php b/src/Dokuwiki/Reader.php new file mode 100644 index 0000000..95accef --- /dev/null +++ b/src/Dokuwiki/Reader.php @@ -0,0 +1,412 @@ + null, + '~IPv6:open~' => '[', + '~IPv6:close~' => ']', + '~LINE:break~' => PHP_EOL + ]; + + private array $_rule = + [ + // Headers + '/^([\s]*)#([^#]+)/' => '$1#$2' . PHP_EOL, + '/^([\s]*)##([^#]+)/' => '$1##$2' . PHP_EOL, + '/^([\s]*)###([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)#####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)######([^#]+)/' => '$1###$2' . PHP_EOL, + + '/^[\s]*[=]{6}([^=]+)[=]{6}/' => '# $1' . PHP_EOL, + '/^[\s]*[=]{5}([^=]+)[=]{5}/' => '## $1' . PHP_EOL, + '/^[\s]*[=]{4}([^=]+)[=]{4}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{3}([^=]+)[=]{3}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{2}([^=]+)[=]{2}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{1}([^=]+)[=]{1}/' => '### $1' . PHP_EOL, + + // Tags + '/\*\*/' => '', + '/\'\'/' => '', + '/\%\%/' => '', + '/(? '', + + // Remove extra spaces + '/(\s)\s+/' => '$1', + + // Links + + /// Detect IPv6 (used as no idea how to resolve square quotes in rules below) + '/\[\[([^\[]+)\[([A-f:0-9]*)\]([^\]]+)\]\]/' => '$1~IPv6:open~$2~IPv6:close~$3', + + /// Remove extra chars + '/\[\[\s*\:?([^\|]+)\s*\|\s*([^\]]+)\s*\]\]/' => '[[$1|$2]]', + '/\[\[\s*\:?([^\]]+)\s*\]\]/' => '[[$1]]', + + '/\{\{\s*\:?([^\|]+)\s*\|\s*([^\}]+)\s*\}\}/' => '{{$1|$2}}', + '/\{\{\s*\:?([^\}]+)\s*\}\}/' => '{{$1}}', + + /// Wikipedia + '/\[\[wp([A-z]{2,})>([^\|]+)\|([^\]]+)\]\]/ui' => '$3 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\|]+)\|([^\]]+)\]\]/i' => '$2 ( https://en.wikipedia.org/wiki/$1 )', + '/\[\[wp([A-z]{2,})>([^\]]+)\]\]/i' => '$2 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\]]+)\]\]/i' => '$1 ( https://en.wikipedia.org/wiki/$1 )', + + /// Dokuwiki + '/\[\[doku>([^\|]+)\|([^\]]+)\]\]/i' => '$2( https://www.dokuwiki.org/$1 )', + '/\[\[doku>([^\]]+)\]\]/i' => '$1( https://www.dokuwiki.org/$1 )', + + /// Index + /// Useful with src/Dokuwiki/Helper.php + '/\{\{indexmenu>:([^\}]+)\}\}/i' => '', + '/\{\{indexmenu_n>[\d]+\}\}/i' => '', + + // Related + '/\[\[this>([^\|]+)\|([^\]]+)\]\]/i' => '$2', + + /// Relative + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\|]+)\|([^\]]+)\]\]/i' => ' $2$3 ( ~URL:base~$1 )', + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\]]+)\]\]/i' => ' $2 ( ~URL:base~$1 )', + + /// Absolute + '/\[\[(https?:)([^\|]+)\|([^\]]+)\]\]/i' => '$3 ( $1$2 )', + '/\[\[(https?:)([^\]]+)\]\]/i' => '$1$2', // @TODO + + /// Media + '/\{\{(?!https?:)([^\|]+)\|([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + '/\{\{(?!https?:)([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + + // List + '/^[\s]?-/' => '* ', + '/^[\s]+\*/' => '*', + + // Separators + '/[\\\]{2}/' => '~LINE:break~', + + // Plugins + '/~~DISCUSSION~~/' => '', // @TODO + '/~~INFO:syntaxplugins~~/' => '', // @TODO + + // Final corrections + '/[\n\r]+[.,;:]+/' => PHP_EOL + ]; + + public function __construct(?array $rules = null) + { + if ($rules) + { + $this->_rule = $rules; + } + } + + // Macros operations + public function getMacroses(): array + { + $this->_macros; + } + + public function setMacroses(array $macros) + { + $this->_macros = $macros; + } + + public function getMacros(string $key, string $value): ?string + { + $this->_macros[$key] = isset($this->_macros[$key]) ? $value : null; + } + + public function setMacros(string $key, ?string $value): void + { + if ($value) + { + $this->_macros[$key] = $value; + } + + else + { + unset( + $this->_macros[$key] + ); + } + } + + // Rule operations + public function getRules(): array + { + $this->_rule; + } + + public function setRules(array $rules) + { + $this->_rule = $rules; + } + + public function getRule(string $key, string $value): ?string + { + $this->_rule[$key] = isset($this->_rule[$key]) ? $value : null; + } + + public function setRule(string $key, ?string $value): void + { + if ($value) + { + $this->_rule[$key] = $value; + } + + else + { + unset( + $this->_rule[$key] + ); + } + } + + // Convert DokuWiki text to Gemini + public function toGemini(?string $data, ?array &$lines = []): ?string + { + if (empty($data)) + { + return null; + } + + $raw = false; + + $lines = []; + + foreach ((array) explode(PHP_EOL, $data) as $line) + { + // Skip any formatting in lines between code tag + if (!$raw && preg_match('/<(code|file)([^>]*)>/i', $line, $matches)) + { + // Prepend tag meta or filename as plain description + if (!empty($matches[0])) + { + $lines[] = preg_replace( + '/<(code|file)[\s-]*([^>]*)>/i', + '$2', + $matches[0] + ); + } + + $lines[] = '```'; + $lines[] = preg_replace( + '/<\/?(code|file)[^>]*>/i', + '', + $line + ); + + $raw = true; + + // Make sure inline tag closed + if (preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = '```'; + + $raw = false; + + continue; + } + + continue; + } + + if ($raw && preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = preg_replace( + '/<\/(code|file)>/i', + '', + $line + ); + + $lines[] = '```'; + + $raw = false; + + continue; + } + + if ($raw) + { + $lines[] = preg_replace( + '/^```/', + ' ```', + $line + ); + + continue; + } + + // Apply config + $lines[] = preg_replace( + array_keys( + $this->_rule + ), + array_values( + $this->_rule + ), + strip_tags( + $line + ) + ); + } + + // ASCII table + $table = false; + + $rows = []; + + $th = []; + + foreach ($lines as $index => $line) + { + // Strip line breaks + $line = str_replace( + '~LINE:break~', + ' ', + $line + ); + + // Header + if (!$table && preg_match_all('/\^([^\^]+)/', $line, $matches)) + { + if (!empty($matches[1])) + { + $table = true; + + $rows = []; + + $th = []; + + foreach ($matches[1] as $value) + { + $th[] = trim( + $value + ); + } + + unset( + $lines[$index] + ); + + continue; + } + } + + // Body + if ($table) + { + $table = false; + + if (preg_match(sprintf('/%s\|/', str_repeat('\|(.*)', count($th))), $line, $matches)) + { + if (count($matches) == count($th) + 1) + { + $table = true; + + $row = []; + foreach ($th as $offset => $column) + { + $row[$column] = trim( + $matches[$offset + 1] + ); + } + + $rows[] = $row; + + unset( + $lines[$index] + ); + } + } + + if (!$table && $rows) + { + $builder = new ArrayToTextTable( + $rows + ); + + $lines[$index] = '```' . PHP_EOL . $builder->render() . PHP_EOL . '```'; + } + } + } + + // Merge lines + return preg_replace( + '/[\n\r]{2,}/', + PHP_EOL . PHP_EOL, + str_replace( + array_keys( + $this->_macros + ), + array_values( + $this->_macros + ), + implode( + PHP_EOL, + $lines + ) + ) + ); + } + + public function getH1(?string $gemini, ?string $regex = '/^[\s]?#([^#]+)/'): ?string + { + foreach ((array) explode(PHP_EOL, (string) $gemini) as $line) + { + preg_match( + $regex, + $line, + $matches + ); + + if (!empty($matches[1])) + { + return trim( + $matches[1] + ); + + break; + } + } + + return null; + } + + public function getLinks(?string $gemini, ?string $regex = '/(https?|gemini):\/\/\S+/'): array + { + $links = []; + + if (empty($gemini)) + { + return $links; + } + + preg_match_all( + $regex, + $gemini, + $matches + ); + + if (!empty($matches[0])) + { + foreach ((array) $matches[0] as $link) + { + $links[] = trim( + $link + ); + } + } + + return array_unique( + $links + ); + } +} \ No newline at end of file diff --git a/src/Gemtext/Body.php b/src/Gemtext/Body.php new file mode 100644 index 0000000..3cfa3fb --- /dev/null +++ b/src/Gemtext/Body.php @@ -0,0 +1,235 @@ + $line) + { + $this->_lines[$index] = $line; + } + } + + public function getLine(int $index): ?int + { + return isset($this->_lines[$index]) ? $this->_lines[$index] : null; + } + + public function getLines(): array + { + return $this->_lines; + } + + public function getH1(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^#([^#]+)/', trim($line), $match)) + { + $matches[$index] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getH2(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^##([^#]+)/', trim($line), $match)) + { + $matches[$index] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getH3(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^###([^#]+)/', trim($line), $match)) + { + $matches[$index] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getLinks(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^=>(.*)/', trim($line), $match)) + { + $matches[$index] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getQuote(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^>(.*)/', trim($line), $match)) + { + $matches[$index] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getCode(): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/^```(.*)/', trim($line), $match)) + { + $matches[$index] = empty($match[1]) ? null : trim($match[1]); + } + } + + return $matches; + } + + public function findLinks(string $protocol = 'gemini'): array + { + $matches = []; + + foreach ($this->_lines as $index => $line) + { + if (preg_match('/' . $protocol . ':\/\/(.*)[\s\S\'"]*/', trim($line), $match)) + { + $matches[$index] = + sprintf( + '%s://%s', + $protocol, + trim( + $match[1] + ) + ); + } + } + + return $matches; + } + + public function skipTags(array $tags = []): string + { + $lines = []; + + foreach ($this->_lines as $line) + { + $line = trim( + $line + ); + + if ($tags) + { + foreach ($tags as $tag) + { + if(!in_array($tag, ['#', '##', '###', '=>', '*', '```'])) + { + continue; + } + + switch (true) + { + case str_starts_with($line, '#'): + + $line = preg_replace( + sprintf( + '/^%s([^#]+)/ui', + $tag + ), + '$1', + $line + ); + + break; + + case str_starts_with($line, '*'): + + $line = preg_replace( + '/^\*(.*)/ui', + '$1', + $line + ); + + break; + + default: + + $line = preg_replace( + sprintf( + '/^%s(.*)/ui', + $tag + ), + '$1', + $line + ); + } + } + } + + else + { + $line = preg_replace( + [ + '/^#([^#]+)/ui', + '/^##([^#]+)/ui', + '/^###([^#]+)/ui', + '/^=>(.*)/ui', + '/^\*(.*)/ui', + '/^```(.*)/ui', + ], + '$1', + $line + ); + } + + $lines[] = trim( + $line + ); + } + + return implode( + PHP_EOL, + $lines + ); + } +} \ No newline at end of file diff --git a/src/Gemtext/Link.php b/src/Gemtext/Link.php new file mode 100644 index 0000000..6557391 --- /dev/null +++ b/src/Gemtext/Link.php @@ -0,0 +1,69 @@ +_line = preg_replace( + '/^\s*=>(.*)/', + '$1', + trim( + $line + ) + ); + } + + public function getAddress(): ?string + { + if (preg_match('/^\s*([^\s]+)/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + return null; + } + + public function getDate(?int &$timestamp = null): ?string + { + if (preg_match('/\s([\d]+-[\d+]+-[\d]+)\s/', trim($this->_line), $match)) + { + if ($result = strtotime($match[1])) + { + $timestamp = $result; + + return trim( + $match[1] + ); + } + } + + return null; + } + + public function getAlt(): ?string + { + if (preg_match('/\s[\d]+-[\d+]+-[\d]+\s(.*)$/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + else if (preg_match('/\s(.*)$/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + return null; + } +} \ No newline at end of file diff --git a/src/Gtk3/Pango.php b/src/Gtk3/Pango.php new file mode 100644 index 0000000..3a8099a --- /dev/null +++ b/src/Gtk3/Pango.php @@ -0,0 +1,225 @@ +getLines(); + + $raw = []; + + $escaped = []; + + // Code + $code = $body->getCode(); + + if (count($code) % 2 == 0) // make sure tags has pairs + { + $i = 1; + + foreach ($code as $index => $capture) + { + // Replace code tags + if ($i % 2 == 0) + { + $lines[$index] = ''; + + // Skip code format inside the tags by raw registry + foreach (array_slice($lines, $offset, $index - $offset) as $start => $line) + { + $raw[$start + $offset] = $line; + } + } + + else + { + if ($capture) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $capture + ) + ); + } + + else + { + $lines[$index] = ''; + } + + $offset = $index + 1; + } + + $escaped[] = $index; + + $i++; + } + } + + // H1 + foreach ($body->getH1() as $index => $value) + { + if (!isset($raw[$index])) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $value + ) + ); + + $escaped[] = $index; + } + } + + // H2 + foreach ($body->getH2() as $index => $value) + { + if (!isset($raw[$index])) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $value + ) + ); + + $escaped[] = $index; + } + } + + // H3 + foreach ($body->getH3() as $index => $value) + { + if (!isset($raw[$index])) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $value + ) + ); + + $escaped[] = $index; + } + } + + // Quote + foreach ($body->getQuote() as $index => $value) + { + if (!isset($raw[$index])) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $value + ) + ); + + $escaped[] = $index; + } + } + + // Links + foreach ($body->getLinks() as $index => $line) + { + if (!isset($raw[$index])) + { + $link = new \Yggverse\Gemini\Gemtext\Link( + $line + ); + + if (!$address = $link->getAddress()) + { + continue; + } + + if (!$alt = $link->getAlt()) + { + if (!$alt = $link->getDate()) + { + $alt = urldecode( + $address + ); + } + } + + if ($alt == $address) + { + $lines[$index] = sprintf( + '%s', + self::escape( + $address + ), + self::escape( + $alt + ) + ); + } + + else + { + $lines[$index] = sprintf( + '%s', + self::escape( + $address + ), + self::escape( + urldecode( + $address + ) + ), + self::escape( + $alt + ) + ); + } + + $escaped[] = $index; + } + } + + // Escape special chars for non escaped lines + foreach ($body->getLines() as $index => $value) + { + if (!in_array($index, $escaped)) + { + $lines[$index] = self::escape( + $value + ); + } + } + + return implode( + PHP_EOL, + $lines + ); + } + + public static function escape( + string $value + ): string + { + return htmlspecialchars( + $value + ); + } +} \ No newline at end of file