diff --git a/README.md b/README.md index 39de76d..65e732f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ # gemini-php -PHP 8 Library for [Gemini Protocol](https://geminiprotocol.net) - -_For optimization reasons, some experimental features like `Dokuwiki` and `GTK3/Pango` was dropped from `1.0.0` release, but available in [previous versions](https://github.com/YGGverse/gemini-php/releases/tag/0.10.1). `Gemtext` component re-implemented as separated library (see [Extras](#extras))_ - -## Extras - -* [gemtext-php](https://github.com/YGGverse/gemtext-php) - Object-oriented PHP 8 library for Gemini / Gemtext operations +PHP 8 Library for Gemini Protocol ## Usage @@ -22,18 +16,18 @@ PHP interface for Gemini protocol queries by TLS socket connection ``` php $request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org:1965/index.gmi' + 'gemini://betahowto.duckdns.org:1965/archive' ); ``` **Resolved request (SNI)** -For direct connection provide resolved IP as the second argument +Optionally, provide resolved IP as the second argument ``` php $request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org:1965/index.gmi' // target URL - '68.133.1.71' // resolved IP, skip to use system-wide resolver + 'gemini://betahowto.duckdns.org:1965/archive' // target URL + '94.140.114.89' // resolved IP, skip to use system-wide resolver ); ``` @@ -43,7 +37,7 @@ Alternatively, use `setResolvedHost` method of `Request` object before `getRespo ``` php $request->setResolvedHost( - '68.133.1.71' + '94.140.114.89' ) ``` @@ -71,27 +65,6 @@ var_dump( ); ``` -#### Request::getOptions -#### Request::setOptions - -``` php -$request = new \Yggverse\Gemini\Client\Request( - 'gemini://yggverse.cities.yesterweb.org', - '68.133.1.71' // make direct request to the resolved host -); - -$request->setOptions( - [ - 'ssl' => - [ - 'peer_name' => 'yggverse.cities.yesterweb.org', // SNI - 'verify_peer' => false, - 'verify_peer_name' => false - ] - ] -); -``` - ### Response This class provides additional features for the raw response operations @@ -115,9 +88,361 @@ var_dump( ); ``` -## Integrations +## Gemtext -* [gemini-dl](https://github.com/YGGverse/gemini-dl) - CLI Batch downloader for Gemini Protocol -* [Yo!](https://github.com/YGGverse/Yo/tree/gemini) - Crawler for different networks -* [Yoda](https://github.com/YGGverse/Yoda) - PHP-GTK browser for Gemini Protocol -* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol \ No newline at end of file +Object-oriented API for Gemtext + +### Body + +Basic methods to work with `text/gemini` documents + +``` php +$body = new \Yggverse\Gemini\Gemtext\Body( + $response->getBody() // gemtext body from client response or .gmi file +); +``` + +#### Body::getH1 +#### Body::getH2 +#### Body::getH3 +#### Body::getLinks + +``` php +var_dump( + $body->getLinks() // returns array of inline links +); +``` + +#### Body::findLinks + +Find context links by protocol as argument, `gemini` by default + +``` php +var_dump( + $body->findLinks('http') // returns array of http links found +); +``` + +#### Body::skipTags + +Strip gemini tags from Gemini document + +``` php +var_dump( + $body->skipTags() // strip all tags +); + +var_dump( + $body->skipTags( + [ // 1- and 2- level headers only + "##", + "###" + ] + ) +); +``` + +### Link + +Inline links parser. + +Allows to extract address, date with timestamp and alt text from link line given + +``` php +foreach ($body->getLinks() as $line) +{ + $link = new \Yggverse\Gemini\Gemtext\Link( + $line + ); + + var_dump( + $link->getAddress() + ); + + var_dump( + $link->getAlt() + ); +} +``` + +#### Link::getAddress +#### Link::getDate + +This method also validates time format and returns the unix timestamp as linked argument + +``` php +var_dump( + $link->getDate( + $timestamp // get unix time from this variable + ) +); + +var_dump( + $timestamp +); +``` + +#### Link::getAlt + +## DokuWiki + +Toolkit provides DokuWiki API for Gemini. + +Allows to simple deploy new apps or make existing website mirror + +### Examples + +* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol + +### Reader + +Read DokuWiki and convert to Gemini + +``` php +$reader = new \Yggverse\Gemini\Dokuwiki\Reader( + // optional regex rule set array +); +``` + +#### Reader::getRules +#### Reader::setRules +#### Reader::getRule +#### Reader::setRule + +Get or change existing regex rule (or just skip by using build-in set) + +``` php +echo $reader->setRule( + '/subject/ui', + 'replacement' +); +``` + +#### Reader::getMacroses +#### Reader::setMacroses +#### Reader::getMacros +#### Reader::setMacros + +``` php +echo $reader->setMacros( + '~my-macros-key~', + '~my-macros-value~', +); +``` + +#### Reader::toGemini + +Convert DokuWiki text to Gemini markup + +As wiki has lot of inline links, to make converted document well-readable, this method does not replace links with new line `=>` macros, but uses inline context: `Name ( URL )`. This model useful with `Reader::getLinks` method, that for example appends all those related links to the document footer. + +If you don't like this implementation, feel free to change it by `Reader::setRule` method! + +``` php +echo $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); +``` + +#### Reader::getH1 + +Get document title + +``` php +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getH1( + $gemini +); +``` + +#### Reader::getLinks + +Get document links + +``` php +$gemini = $reader->toGemini( + file_get_contents( + '/host/data/pages/index.txt' + ) +); + +echo $reader->getLinks( + $gemini +); +``` + +### Filesystem + +Provides methods for simple and secure interaction with DokuWiki file storage + +``` php +$filesystem = new \Yggverse\Gemini\Dokuwiki\Filesystem( + '/host/data' // storage location +); +``` + +#### Filesystem::getList + +Return simple array of all files in storage + +``` php +var_dump ( + $filesystem->getList( + 'hello:world' + ) +); +``` + +#### Filesystem::getTree + +Return all files under the storage folder in tree format + +``` php +var_dump ( + $filesystem->getTree( + 'hello:world' + ) +); +``` + +#### Filesystem::getPagePathsByPath + +Return pages under the given data directory + +``` php +var_dump ( + $filesystem->getPagePathsByPath( + // absolute path to target data directory (e.g. Filesystem::getDirectoryPathByUri) + ) +); +``` + +#### Filesystem::getDirectoryPathByUri +#### Filesystem::getPagePathByUri + +Return absolute path to stored page file + +``` php +var_dump ( + $filesystem->getPagePathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getDirectoryUriByPath +#### Filesystem::getPageUriByPath + +Return page URI in `dokuwiki:format` + +``` php +var_dump ( + $filesystem->getPageUriByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getMediaPathByUri + +Return absolute path to stored media file + +``` php +var_dump ( + $filesystem->getMediaPathByUri( + 'hello:world' + ) +); +``` + +#### Filesystem::getMimeByPath + +Return file MIME if path match storage item + +``` php +var_dump ( + $filesystem->getMimeByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::getDataByPath + +Return file content if path match storage item + +``` php +var_dump ( + $filesystem->getDataByPath( + '/full/path/to/page.txt' + ) +); +``` + +#### Filesystem::isPath + +Check path exist and match storage item + +``` php +var_dump ( + $filesystem->isPath( + '/full/path/to/page.txt' + ) +); +``` + +### Helper + +Useful methods to minify controller codebase + +``` php +$helper = new \Yggverse\Gemini\Dokuwiki\Helper( + new \Yggverse\Gemini\Dokuwiki\Filesystem(), + new \Yggverse\Gemini\Dokuwiki\Reader() +); +``` + +#### Helper::getChildrenSectionLinksByUri + +Return simple array of children section links in Gemini format + +``` php +var_dump ( + $helper->getChildrenSectionLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getChildrenPageLinksByUri + +Return simple array of children page links in Gemini format + +``` php +var_dump ( + $helper->getChildrenPageLinksByUri( + 'hello:world' + ) +); +``` + +#### Helper::getPageLinkByPath + +Return page link (that contain document name) in Gemini format + +``` php +var_dump ( + $helper->getPageLinkByPath( + $filesystem->getPagePathByUri( + 'hello:world' + ) + ) +); +``` \ No newline at end of file diff --git a/composer.json b/composer.json index 33c1ae1..efc7aa6 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "yggverse/gemini", "description": "PHP 8 Library for Gemini Protocol", - "keywords": [ "yggverse", "gemini", "gemini-protocol", "client", "request", "response" ], + "keywords": [ "yggverse", "gemini", "wiki", "dokuwiki", "markdown" ], "homepage": "https://github.com/yggverse/gemini-php", "type": "library", "license": "MIT", @@ -10,5 +10,7 @@ "Yggverse\\Gemini\\": "src/" } }, - "require": {} + "require": { + "dekor/php-array-table": "^2.0" + } } diff --git a/src/Client/Request.php b/src/Client/Request.php index 183ce56..6b3583a 100644 --- a/src/Client/Request.php +++ b/src/Client/Request.php @@ -17,10 +17,8 @@ class Request [ 'ssl' => [ - 'allow_self_signed' => true, - 'disable_compression' => true, - 'verify_peer_name' => false, - 'verify_peer' => false + 'verify_peer' => false, + 'verify_peer_name' => false ] ]; @@ -35,9 +33,7 @@ class Request else { - throw new \Exception( - _('Host required') - ); + throw new Exception(); // @TODO } if ($port = parse_url($url, PHP_URL_PORT)) @@ -186,10 +182,7 @@ class Request $this->_host, $this->_port, $this->_path, - $this->_query ? sprintf( - '?%s', - $this->_query - ) : null + $this->_query ) ); diff --git a/src/Client/Response.php b/src/Client/Response.php index aabc7cf..5988423 100644 --- a/src/Client/Response.php +++ b/src/Client/Response.php @@ -17,7 +17,7 @@ class Response $match = []; preg_match( - '/^(?\d{2})(?.*)$/m', + '/(?\d{2})\s(?.*)\r\n(?.*)/su', $data, $match ); @@ -37,16 +37,14 @@ class Response if (isset($match['meta']) && mb_strlen($match['meta']) <= 1024) { $this->setMeta( - trim( - (string) $match['meta'] - ) + (string) $match['meta'] ); } - if ($body = substr($data, strpos($data, chr(10)) + 1)) + if (isset($match['body'])) { $this->setBody( - (string) $body + (string) (string) $match['body'] ); } } diff --git a/src/Dokuwiki/Filesystem.php b/src/Dokuwiki/Filesystem.php new file mode 100644 index 0000000..33b79c2 --- /dev/null +++ b/src/Dokuwiki/Filesystem.php @@ -0,0 +1,277 @@ +_path = rtrim( + $path, + '/' + ); + + $this->_index( + $this->_path + ); + } + + public function getTree(): array + { + return $this->_tree; + } + + public function getList(): array + { + return $this->_list; + } + + public function getPagePathsByPath(string $path): ?array + { + if (isset($this->_tree[$path])) + { + return $this->_tree[$path]; + } + + return null; + } + + public function getPagePathByUri(string $uri): ?string + { + $path = sprintf( + '%s/pages/%s.txt', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getPageUriByPath(string $path): ?string + { + if (!$this->isPath($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages/', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/', + '.txt' + ], + [ + ':', + null + ], + $path + ); + + return $path; + } + + public function getDirectoryPathByUri(string $uri = ''): ?string + { + $path = rtrim( + sprintf( + '%s/pages/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ), + '/' + ); + + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + return $path; + } + + public function getDirectoryUriByPath(string $path): ?string + { + if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) + { + return null; + } + + $path = str_replace( + sprintf( + '%s/pages', + $this->_path + ), + '', + $path + ); + + $path = trim( + $path, + '/' + ); + + $path = str_replace( + [ + '/' + ], + [ + ':' + ], + $path + ); + + return $path; + } + + public function getMediaPathByUri(string $uri): ?string + { + $path = sprintf( + '%s/media/%s', + $this->_path, + str_replace( + ':', + '/', + mb_strtolower( + urldecode( + $uri + ) + ) + ) + ); + + if (!$this->isPath($path)) + { + return null; + } + + return $path; + } + + public function getMimeByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($mime = mime_content_type($path)) + { + return $mime; + } + } + + return null; + } + + public function getDataByPath(?string $path): ?string + { + if ($this->isPath($path)) + { + if ($data = file_get_contents($path)) + { + return $data; + } + } + + return null; + } + + public function isPath(?string $path): bool + { + if (in_array($path, $this->_list) && is_file($path) && is_readable($path)) + { + return true; + } + + return false; + } + + private function _index( + string $path, + ?array $blacklist = ['sidebar.txt', '__template.txt'] + ): void + { + foreach ((array) scandir($path) as $file) + { + if (str_starts_with($file, '.')) + { + continue; + } + + if (is_link($file)) + { + continue; + } + + if (in_array($file, $blacklist)) + { + continue; + } + + $file = sprintf( + '%s/%s', + $path, + $file + ); + + switch (true) + { + case is_dir($file): + + if (!isset($this->_tree[$path])) + { + $this->_tree[$path] = []; + } + + $this->_index($file); + + break; + + case is_file($file): + + $this->_tree[$path][] = $file; + + $this->_list[] = $file; + + break; + } + } + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Helper.php b/src/Dokuwiki/Helper.php new file mode 100644 index 0000000..232b10f --- /dev/null +++ b/src/Dokuwiki/Helper.php @@ -0,0 +1,154 @@ +_filesystem = $filesystem; + $this->_reader = $reader; + } + + public function getChildrenSectionLinksByUri(?string $uri = ''): array + { + $sections = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getTree() as $path => $files) + { + if (str_starts_with($path, $directory) && $path != $directory) + { + // Init link name + $h1 = null; + + // Init this directory URI + $thisUri = $this->_filesystem->getDirectoryUriByPath( + $path + ); + + // Skip sections deeper this level + if (substr_count($thisUri, ':') > ($uri ? substr_count($uri, ':') + 1 : 0)) + { + continue; + } + + // Get section names + $segments = []; + + foreach ((array) explode(':', $thisUri) as $segment) + { + $segments[] = $segment; + + // Find section index if exists + if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments) . ':' . $segment)) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Find section page if exists + else if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments))) + { + $h1 = $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $file + ) + ) + ); + } + + // Reset title of undefined segment + else + { + $h1 = null; + } + } + + // Register section link + $sections[] = sprintf( + '=> /%s %s', + $thisUri, + $h1 + ); + } + } + } + + // Keep unique + $sections = array_unique( + $sections + ); + + // Sort asc + sort( + $sections + ); + + return $sections; + } + + public function getChildrenPageLinksByUri(?string $uri = ''): array + { + $pages = []; + + if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) + { + foreach ((array) $this->_filesystem->getPagePathsByPath($directory) as $file) + { + if ($link = $this->getPageLinkByPath($file)) + { + $pages[] = $link; + } + } + } + + // Keep unique + $pages = array_unique( + $pages + ); + + // Sort asc + sort( + $pages + ); + + return $pages; + } + + public function getPageLinkByPath(string $path): ?string + { + if (in_array($path, $this->_filesystem->getList()) && is_file($path) && is_readable($path)) + { + return sprintf( + '=> /%s %s', + $this->_filesystem->getPageUriByPath( + $path + ), + $this->_reader->getH1( + $this->_reader->toGemini( + $this->_filesystem->getDataByPath( + $path + ) + ) + ) + ); + } + + return null; + } +} \ No newline at end of file diff --git a/src/Dokuwiki/Reader.php b/src/Dokuwiki/Reader.php new file mode 100644 index 0000000..95accef --- /dev/null +++ b/src/Dokuwiki/Reader.php @@ -0,0 +1,412 @@ + null, + '~IPv6:open~' => '[', + '~IPv6:close~' => ']', + '~LINE:break~' => PHP_EOL + ]; + + private array $_rule = + [ + // Headers + '/^([\s]*)#([^#]+)/' => '$1#$2' . PHP_EOL, + '/^([\s]*)##([^#]+)/' => '$1##$2' . PHP_EOL, + '/^([\s]*)###([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)#####([^#]+)/' => '$1###$2' . PHP_EOL, + '/^([\s]*)######([^#]+)/' => '$1###$2' . PHP_EOL, + + '/^[\s]*[=]{6}([^=]+)[=]{6}/' => '# $1' . PHP_EOL, + '/^[\s]*[=]{5}([^=]+)[=]{5}/' => '## $1' . PHP_EOL, + '/^[\s]*[=]{4}([^=]+)[=]{4}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{3}([^=]+)[=]{3}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{2}([^=]+)[=]{2}/' => '### $1' . PHP_EOL, + '/^[\s]*[=]{1}([^=]+)[=]{1}/' => '### $1' . PHP_EOL, + + // Tags + '/\*\*/' => '', + '/\'\'/' => '', + '/\%\%/' => '', + '/(? '', + + // Remove extra spaces + '/(\s)\s+/' => '$1', + + // Links + + /// Detect IPv6 (used as no idea how to resolve square quotes in rules below) + '/\[\[([^\[]+)\[([A-f:0-9]*)\]([^\]]+)\]\]/' => '$1~IPv6:open~$2~IPv6:close~$3', + + /// Remove extra chars + '/\[\[\s*\:?([^\|]+)\s*\|\s*([^\]]+)\s*\]\]/' => '[[$1|$2]]', + '/\[\[\s*\:?([^\]]+)\s*\]\]/' => '[[$1]]', + + '/\{\{\s*\:?([^\|]+)\s*\|\s*([^\}]+)\s*\}\}/' => '{{$1|$2}}', + '/\{\{\s*\:?([^\}]+)\s*\}\}/' => '{{$1}}', + + /// Wikipedia + '/\[\[wp([A-z]{2,})>([^\|]+)\|([^\]]+)\]\]/ui' => '$3 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\|]+)\|([^\]]+)\]\]/i' => '$2 ( https://en.wikipedia.org/wiki/$1 )', + '/\[\[wp([A-z]{2,})>([^\]]+)\]\]/i' => '$2 ( https://$1.wikipedia.org/wiki/$2 )', + '/\[\[wp>([^\]]+)\]\]/i' => '$1 ( https://en.wikipedia.org/wiki/$1 )', + + /// Dokuwiki + '/\[\[doku>([^\|]+)\|([^\]]+)\]\]/i' => '$2( https://www.dokuwiki.org/$1 )', + '/\[\[doku>([^\]]+)\]\]/i' => '$1( https://www.dokuwiki.org/$1 )', + + /// Index + /// Useful with src/Dokuwiki/Helper.php + '/\{\{indexmenu>:([^\}]+)\}\}/i' => '', + '/\{\{indexmenu_n>[\d]+\}\}/i' => '', + + // Related + '/\[\[this>([^\|]+)\|([^\]]+)\]\]/i' => '$2', + + /// Relative + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\|]+)\|([^\]]+)\]\]/i' => ' $2$3 ( ~URL:base~$1 )', + '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\]]+)\]\]/i' => ' $2 ( ~URL:base~$1 )', + + /// Absolute + '/\[\[(https?:)([^\|]+)\|([^\]]+)\]\]/i' => '$3 ( $1$2 )', + '/\[\[(https?:)([^\]]+)\]\]/i' => '$1$2', // @TODO + + /// Media + '/\{\{(?!https?:)([^\|]+)\|([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + '/\{\{(?!https?:)([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, + + // List + '/^[\s]?-/' => '* ', + '/^[\s]+\*/' => '*', + + // Separators + '/[\\\]{2}/' => '~LINE:break~', + + // Plugins + '/~~DISCUSSION~~/' => '', // @TODO + '/~~INFO:syntaxplugins~~/' => '', // @TODO + + // Final corrections + '/[\n\r]+[.,;:]+/' => PHP_EOL + ]; + + public function __construct(?array $rules = null) + { + if ($rules) + { + $this->_rule = $rules; + } + } + + // Macros operations + public function getMacroses(): array + { + $this->_macros; + } + + public function setMacroses(array $macros) + { + $this->_macros = $macros; + } + + public function getMacros(string $key, string $value): ?string + { + $this->_macros[$key] = isset($this->_macros[$key]) ? $value : null; + } + + public function setMacros(string $key, ?string $value): void + { + if ($value) + { + $this->_macros[$key] = $value; + } + + else + { + unset( + $this->_macros[$key] + ); + } + } + + // Rule operations + public function getRules(): array + { + $this->_rule; + } + + public function setRules(array $rules) + { + $this->_rule = $rules; + } + + public function getRule(string $key, string $value): ?string + { + $this->_rule[$key] = isset($this->_rule[$key]) ? $value : null; + } + + public function setRule(string $key, ?string $value): void + { + if ($value) + { + $this->_rule[$key] = $value; + } + + else + { + unset( + $this->_rule[$key] + ); + } + } + + // Convert DokuWiki text to Gemini + public function toGemini(?string $data, ?array &$lines = []): ?string + { + if (empty($data)) + { + return null; + } + + $raw = false; + + $lines = []; + + foreach ((array) explode(PHP_EOL, $data) as $line) + { + // Skip any formatting in lines between code tag + if (!$raw && preg_match('/<(code|file)([^>]*)>/i', $line, $matches)) + { + // Prepend tag meta or filename as plain description + if (!empty($matches[0])) + { + $lines[] = preg_replace( + '/<(code|file)[\s-]*([^>]*)>/i', + '$2', + $matches[0] + ); + } + + $lines[] = '```'; + $lines[] = preg_replace( + '/<\/?(code|file)[^>]*>/i', + '', + $line + ); + + $raw = true; + + // Make sure inline tag closed + if (preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = '```'; + + $raw = false; + + continue; + } + + continue; + } + + if ($raw && preg_match('/<\/(code|file)>/i', $line)) + { + $lines[] = preg_replace( + '/<\/(code|file)>/i', + '', + $line + ); + + $lines[] = '```'; + + $raw = false; + + continue; + } + + if ($raw) + { + $lines[] = preg_replace( + '/^```/', + ' ```', + $line + ); + + continue; + } + + // Apply config + $lines[] = preg_replace( + array_keys( + $this->_rule + ), + array_values( + $this->_rule + ), + strip_tags( + $line + ) + ); + } + + // ASCII table + $table = false; + + $rows = []; + + $th = []; + + foreach ($lines as $index => $line) + { + // Strip line breaks + $line = str_replace( + '~LINE:break~', + ' ', + $line + ); + + // Header + if (!$table && preg_match_all('/\^([^\^]+)/', $line, $matches)) + { + if (!empty($matches[1])) + { + $table = true; + + $rows = []; + + $th = []; + + foreach ($matches[1] as $value) + { + $th[] = trim( + $value + ); + } + + unset( + $lines[$index] + ); + + continue; + } + } + + // Body + if ($table) + { + $table = false; + + if (preg_match(sprintf('/%s\|/', str_repeat('\|(.*)', count($th))), $line, $matches)) + { + if (count($matches) == count($th) + 1) + { + $table = true; + + $row = []; + foreach ($th as $offset => $column) + { + $row[$column] = trim( + $matches[$offset + 1] + ); + } + + $rows[] = $row; + + unset( + $lines[$index] + ); + } + } + + if (!$table && $rows) + { + $builder = new ArrayToTextTable( + $rows + ); + + $lines[$index] = '```' . PHP_EOL . $builder->render() . PHP_EOL . '```'; + } + } + } + + // Merge lines + return preg_replace( + '/[\n\r]{2,}/', + PHP_EOL . PHP_EOL, + str_replace( + array_keys( + $this->_macros + ), + array_values( + $this->_macros + ), + implode( + PHP_EOL, + $lines + ) + ) + ); + } + + public function getH1(?string $gemini, ?string $regex = '/^[\s]?#([^#]+)/'): ?string + { + foreach ((array) explode(PHP_EOL, (string) $gemini) as $line) + { + preg_match( + $regex, + $line, + $matches + ); + + if (!empty($matches[1])) + { + return trim( + $matches[1] + ); + + break; + } + } + + return null; + } + + public function getLinks(?string $gemini, ?string $regex = '/(https?|gemini):\/\/\S+/'): array + { + $links = []; + + if (empty($gemini)) + { + return $links; + } + + preg_match_all( + $regex, + $gemini, + $matches + ); + + if (!empty($matches[0])) + { + foreach ((array) $matches[0] as $link) + { + $links[] = trim( + $link + ); + } + } + + return array_unique( + $links + ); + } +} \ No newline at end of file diff --git a/src/Gemtext/Body.php b/src/Gemtext/Body.php new file mode 100644 index 0000000..5e18fd0 --- /dev/null +++ b/src/Gemtext/Body.php @@ -0,0 +1,193 @@ +_lines[] = $line; + } + } + + public function getH1(): array + { + $matches = []; + + foreach ($this->_lines as $line) + { + if (preg_match('/^#([^#]+)/', trim($line), $match)) + { + $matches[] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getH2(): array + { + $matches = []; + + foreach ($this->_lines as $line) + { + if (preg_match('/^##([^#]+)/', trim($line), $match)) + { + $matches[] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getH3(): array + { + $matches = []; + + foreach ($this->_lines as $line) + { + if (preg_match('/^###([^#]+)/', trim($line), $match)) + { + $matches[] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function getLinks(): array + { + $matches = []; + + foreach ($this->_lines as $line) + { + if (preg_match('/^=>(.*)/', trim($line), $match)) + { + $matches[] = trim( + $match[1] + ); + } + } + + return $matches; + } + + public function findLinks(string $protocol = 'gemini'): array + { + $matches = []; + + foreach ($this->_lines as $line) + { + if (preg_match('/' . $protocol . ':\/\/(.*)[\s\S\'"]*/', trim($line), $match)) + { + $matches[] = + sprintf( + '%s://%s', + $protocol, + trim( + $match[1] + ) + ); + } + } + + return $matches; + } + + public function skipTags(array $tags = []): string + { + $lines = []; + + foreach ($this->_lines as $line) + { + $line = trim( + $line + ); + + if ($tags) + { + foreach ($tags as $tag) + { + if(!in_array($tag, ['#', '##', '###', '=>', '*', '```'])) + { + continue; + } + + switch (true) + { + case str_starts_with($line, '#'): + + $line = preg_replace( + sprintf( + '/^%s([^#]+)/ui', + $tag + ), + '$1', + $line + ); + + break; + + case str_starts_with($line, '*'): + + $line = preg_replace( + '/^\*(.*)/ui', + '$1', + $line + ); + + break; + + default: + + $line = preg_replace( + sprintf( + '/^%s(.*)/ui', + $tag + ), + '$1', + $line + ); + } + } + } + + else + { + $line = preg_replace( + [ + '/^#([^#]+)/ui', + '/^##([^#]+)/ui', + '/^###([^#]+)/ui', + '/^=>(.*)/ui', + '/^\*(.*)/ui', + '/^```(.*)/ui', + ], + '$1', + $line + ); + } + + $lines[] = trim( + $line + ); + } + + return implode( + PHP_EOL, + $lines + ); + } +} \ No newline at end of file diff --git a/src/Gemtext/Link.php b/src/Gemtext/Link.php new file mode 100644 index 0000000..f7ed077 --- /dev/null +++ b/src/Gemtext/Link.php @@ -0,0 +1,63 @@ +_line = $line; + } + + public function getAddress(): ?string + { + if (preg_match('/^([^\s]+)\s.*/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + return null; + } + + public function getDate(?int &$timestamp = null): ?string + { + if (preg_match('/\s([\d]+-[\d+]+-[\d]+)\s/', trim($this->_line), $match)) + { + if ($result = strtotime($match[1])) + { + $timestamp = $result; + + return trim( + $match[1] + ); + } + } + + return null; + } + + public function getAlt(): ?string + { + if (preg_match('/\s[\d]+-[\d+]+-[\d]+\s(.*)$/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + else if (preg_match('/\s(.*)$/', trim($this->_line), $match)) + { + return trim( + $match[1] + ); + } + + return null; + } +} \ No newline at end of file