diff --git a/README.md b/README.md index 5eaa349..0fe3a06 100644 --- a/README.md +++ b/README.md @@ -109,269 +109,6 @@ var_dump( ); ``` -## DokuWiki - -Toolkit provides DokuWiki API for Gemini. - -Allows to simple deploy new apps or make existing website mirror - -### Examples - -* [β-Doku](https://github.com/YGGverse/bdoku) - DokuWiki Satellite for Gemini Protocol - -### Reader - -Read DokuWiki and convert to Gemini - -``` php -$reader = new \Yggverse\Gemini\Dokuwiki\Reader( - // optional regex rule set array -); -``` - -#### Reader::getRules -#### Reader::setRules -#### Reader::getRule -#### Reader::setRule - -Get or change existing regex rule (or just skip by using build-in set) - -``` php -echo $reader->setRule( - '/subject/ui', - 'replacement' -); -``` - -#### Reader::getMacroses -#### Reader::setMacroses -#### Reader::getMacros -#### Reader::setMacros - -``` php -echo $reader->setMacros( - '~my-macros-key~', - '~my-macros-value~', -); -``` - -#### Reader::toGemini - -Convert DokuWiki text to Gemini markup - -As wiki has lot of inline links, to make converted document well-readable, this method does not replace links with new line `=>` macros, but uses inline context: `Name ( URL )`. This model useful with `Reader::getLinks` method, that for example appends all those related links to the document footer. - -If you don't like this implementation, feel free to change it by `Reader::setRule` method! - -``` php -echo $reader->toGemini( - file_get_contents( - '/host/data/pages/index.txt' - ) -); -``` - -#### Reader::getH1 - -Get document title - -``` php -$gemini = $reader->toGemini( - file_get_contents( - '/host/data/pages/index.txt' - ) -); - -echo $reader->getH1( - $gemini -); -``` - -#### Reader::getLinks - -Get document links - -``` php -$gemini = $reader->toGemini( - file_get_contents( - '/host/data/pages/index.txt' - ) -); - -echo $reader->getLinks( - $gemini -); -``` - -### Filesystem - -Provides methods for simple and secure interaction with DokuWiki file storage - -``` php -$filesystem = new \Yggverse\Gemini\Dokuwiki\Filesystem( - '/host/data' // storage location -); -``` - -#### Filesystem::getList - -Return simple array of all files in storage - -``` php -var_dump ( - $filesystem->getList( - 'hello:world' - ) -); -``` - -#### Filesystem::getTree - -Return all files under the storage folder in tree format - -``` php -var_dump ( - $filesystem->getTree( - 'hello:world' - ) -); -``` - -#### Filesystem::getPagePathsByPath - -Return pages under the given data directory - -``` php -var_dump ( - $filesystem->getPagePathsByPath( - // absolute path to target data directory (e.g. Filesystem::getDirectoryPathByUri) - ) -); -``` - -#### Filesystem::getDirectoryPathByUri -#### Filesystem::getPagePathByUri - -Return absolute path to stored page file - -``` php -var_dump ( - $filesystem->getPagePathByUri( - 'hello:world' - ) -); -``` - -#### Filesystem::getDirectoryUriByPath -#### Filesystem::getPageUriByPath - -Return page URI in `dokuwiki:format` - -``` php -var_dump ( - $filesystem->getPageUriByPath( - '/full/path/to/page.txt' - ) -); -``` - -#### Filesystem::getMediaPathByUri - -Return absolute path to stored media file - -``` php -var_dump ( - $filesystem->getMediaPathByUri( - 'hello:world' - ) -); -``` - -#### Filesystem::getMimeByPath - -Return file MIME if path match storage item - -``` php -var_dump ( - $filesystem->getMimeByPath( - '/full/path/to/page.txt' - ) -); -``` - -#### Filesystem::getDataByPath - -Return file content if path match storage item - -``` php -var_dump ( - $filesystem->getDataByPath( - '/full/path/to/page.txt' - ) -); -``` - -#### Filesystem::isPath - -Check path exist and match storage item - -``` php -var_dump ( - $filesystem->isPath( - '/full/path/to/page.txt' - ) -); -``` - -### Helper - -Useful methods to minify controller codebase - -``` php -$helper = new \Yggverse\Gemini\Dokuwiki\Helper( - new \Yggverse\Gemini\Dokuwiki\Filesystem(), - new \Yggverse\Gemini\Dokuwiki\Reader() -); -``` - -#### Helper::getChildrenSectionLinksByUri - -Return simple array of children section links in Gemini format - -``` php -var_dump ( - $helper->getChildrenSectionLinksByUri( - 'hello:world' - ) -); -``` - -#### Helper::getChildrenPageLinksByUri - -Return simple array of children page links in Gemini format - -``` php -var_dump ( - $helper->getChildrenPageLinksByUri( - 'hello:world' - ) -); -``` - -#### Helper::getPageLinkByPath - -Return page link (that contain document name) in Gemini format - -``` php -var_dump ( - $helper->getPageLinkByPath( - $filesystem->getPagePathByUri( - 'hello:world' - ) - ) -); -``` - ## Integrations * [β-Doku is DokuWiki Satellite for Gemini Protocol](https://github.com/YGGverse/bdoku) diff --git a/src/Dokuwiki/Filesystem.php b/src/Dokuwiki/Filesystem.php deleted file mode 100644 index 33b79c2..0000000 --- a/src/Dokuwiki/Filesystem.php +++ /dev/null @@ -1,277 +0,0 @@ -_path = rtrim( - $path, - '/' - ); - - $this->_index( - $this->_path - ); - } - - public function getTree(): array - { - return $this->_tree; - } - - public function getList(): array - { - return $this->_list; - } - - public function getPagePathsByPath(string $path): ?array - { - if (isset($this->_tree[$path])) - { - return $this->_tree[$path]; - } - - return null; - } - - public function getPagePathByUri(string $uri): ?string - { - $path = sprintf( - '%s/pages/%s.txt', - $this->_path, - str_replace( - ':', - '/', - mb_strtolower( - urldecode( - $uri - ) - ) - ) - ); - - if (!$this->isPath($path)) - { - return null; - } - - return $path; - } - - public function getPageUriByPath(string $path): ?string - { - if (!$this->isPath($path)) - { - return null; - } - - $path = str_replace( - sprintf( - '%s/pages/', - $this->_path - ), - '', - $path - ); - - $path = trim( - $path, - '/' - ); - - $path = str_replace( - [ - '/', - '.txt' - ], - [ - ':', - null - ], - $path - ); - - return $path; - } - - public function getDirectoryPathByUri(string $uri = ''): ?string - { - $path = rtrim( - sprintf( - '%s/pages/%s', - $this->_path, - str_replace( - ':', - '/', - mb_strtolower( - urldecode( - $uri - ) - ) - ) - ), - '/' - ); - - if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) - { - return null; - } - - return $path; - } - - public function getDirectoryUriByPath(string $path): ?string - { - if (!isset($this->_tree[$path]) || !is_dir($path) || !is_readable($path)) - { - return null; - } - - $path = str_replace( - sprintf( - '%s/pages', - $this->_path - ), - '', - $path - ); - - $path = trim( - $path, - '/' - ); - - $path = str_replace( - [ - '/' - ], - [ - ':' - ], - $path - ); - - return $path; - } - - public function getMediaPathByUri(string $uri): ?string - { - $path = sprintf( - '%s/media/%s', - $this->_path, - str_replace( - ':', - '/', - mb_strtolower( - urldecode( - $uri - ) - ) - ) - ); - - if (!$this->isPath($path)) - { - return null; - } - - return $path; - } - - public function getMimeByPath(?string $path): ?string - { - if ($this->isPath($path)) - { - if ($mime = mime_content_type($path)) - { - return $mime; - } - } - - return null; - } - - public function getDataByPath(?string $path): ?string - { - if ($this->isPath($path)) - { - if ($data = file_get_contents($path)) - { - return $data; - } - } - - return null; - } - - public function isPath(?string $path): bool - { - if (in_array($path, $this->_list) && is_file($path) && is_readable($path)) - { - return true; - } - - return false; - } - - private function _index( - string $path, - ?array $blacklist = ['sidebar.txt', '__template.txt'] - ): void - { - foreach ((array) scandir($path) as $file) - { - if (str_starts_with($file, '.')) - { - continue; - } - - if (is_link($file)) - { - continue; - } - - if (in_array($file, $blacklist)) - { - continue; - } - - $file = sprintf( - '%s/%s', - $path, - $file - ); - - switch (true) - { - case is_dir($file): - - if (!isset($this->_tree[$path])) - { - $this->_tree[$path] = []; - } - - $this->_index($file); - - break; - - case is_file($file): - - $this->_tree[$path][] = $file; - - $this->_list[] = $file; - - break; - } - } - } -} \ No newline at end of file diff --git a/src/Dokuwiki/Helper.php b/src/Dokuwiki/Helper.php deleted file mode 100644 index 232b10f..0000000 --- a/src/Dokuwiki/Helper.php +++ /dev/null @@ -1,154 +0,0 @@ -_filesystem = $filesystem; - $this->_reader = $reader; - } - - public function getChildrenSectionLinksByUri(?string $uri = ''): array - { - $sections = []; - - if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) - { - foreach ((array) $this->_filesystem->getTree() as $path => $files) - { - if (str_starts_with($path, $directory) && $path != $directory) - { - // Init link name - $h1 = null; - - // Init this directory URI - $thisUri = $this->_filesystem->getDirectoryUriByPath( - $path - ); - - // Skip sections deeper this level - if (substr_count($thisUri, ':') > ($uri ? substr_count($uri, ':') + 1 : 0)) - { - continue; - } - - // Get section names - $segments = []; - - foreach ((array) explode(':', $thisUri) as $segment) - { - $segments[] = $segment; - - // Find section index if exists - if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments) . ':' . $segment)) - { - $h1 = $this->_reader->getH1( - $this->_reader->toGemini( - $this->_filesystem->getDataByPath( - $file - ) - ) - ); - } - - // Find section page if exists - else if ($file = $this->_filesystem->getPagePathByUri(implode(':', $segments))) - { - $h1 = $this->_reader->getH1( - $this->_reader->toGemini( - $this->_filesystem->getDataByPath( - $file - ) - ) - ); - } - - // Reset title of undefined segment - else - { - $h1 = null; - } - } - - // Register section link - $sections[] = sprintf( - '=> /%s %s', - $thisUri, - $h1 - ); - } - } - } - - // Keep unique - $sections = array_unique( - $sections - ); - - // Sort asc - sort( - $sections - ); - - return $sections; - } - - public function getChildrenPageLinksByUri(?string $uri = ''): array - { - $pages = []; - - if ($directory = $this->_filesystem->getDirectoryPathByUri($uri)) - { - foreach ((array) $this->_filesystem->getPagePathsByPath($directory) as $file) - { - if ($link = $this->getPageLinkByPath($file)) - { - $pages[] = $link; - } - } - } - - // Keep unique - $pages = array_unique( - $pages - ); - - // Sort asc - sort( - $pages - ); - - return $pages; - } - - public function getPageLinkByPath(string $path): ?string - { - if (in_array($path, $this->_filesystem->getList()) && is_file($path) && is_readable($path)) - { - return sprintf( - '=> /%s %s', - $this->_filesystem->getPageUriByPath( - $path - ), - $this->_reader->getH1( - $this->_reader->toGemini( - $this->_filesystem->getDataByPath( - $path - ) - ) - ) - ); - } - - return null; - } -} \ No newline at end of file diff --git a/src/Dokuwiki/Reader.php b/src/Dokuwiki/Reader.php deleted file mode 100644 index 95accef..0000000 --- a/src/Dokuwiki/Reader.php +++ /dev/null @@ -1,412 +0,0 @@ - null, - '~IPv6:open~' => '[', - '~IPv6:close~' => ']', - '~LINE:break~' => PHP_EOL - ]; - - private array $_rule = - [ - // Headers - '/^([\s]*)#([^#]+)/' => '$1#$2' . PHP_EOL, - '/^([\s]*)##([^#]+)/' => '$1##$2' . PHP_EOL, - '/^([\s]*)###([^#]+)/' => '$1###$2' . PHP_EOL, - '/^([\s]*)####([^#]+)/' => '$1###$2' . PHP_EOL, - '/^([\s]*)#####([^#]+)/' => '$1###$2' . PHP_EOL, - '/^([\s]*)######([^#]+)/' => '$1###$2' . PHP_EOL, - - '/^[\s]*[=]{6}([^=]+)[=]{6}/' => '# $1' . PHP_EOL, - '/^[\s]*[=]{5}([^=]+)[=]{5}/' => '## $1' . PHP_EOL, - '/^[\s]*[=]{4}([^=]+)[=]{4}/' => '### $1' . PHP_EOL, - '/^[\s]*[=]{3}([^=]+)[=]{3}/' => '### $1' . PHP_EOL, - '/^[\s]*[=]{2}([^=]+)[=]{2}/' => '### $1' . PHP_EOL, - '/^[\s]*[=]{1}([^=]+)[=]{1}/' => '### $1' . PHP_EOL, - - // Tags - '/\*\*/' => '', - '/\'\'/' => '', - '/\%\%/' => '', - '/(? '', - - // Remove extra spaces - '/(\s)\s+/' => '$1', - - // Links - - /// Detect IPv6 (used as no idea how to resolve square quotes in rules below) - '/\[\[([^\[]+)\[([A-f:0-9]*)\]([^\]]+)\]\]/' => '$1~IPv6:open~$2~IPv6:close~$3', - - /// Remove extra chars - '/\[\[\s*\:?([^\|]+)\s*\|\s*([^\]]+)\s*\]\]/' => '[[$1|$2]]', - '/\[\[\s*\:?([^\]]+)\s*\]\]/' => '[[$1]]', - - '/\{\{\s*\:?([^\|]+)\s*\|\s*([^\}]+)\s*\}\}/' => '{{$1|$2}}', - '/\{\{\s*\:?([^\}]+)\s*\}\}/' => '{{$1}}', - - /// Wikipedia - '/\[\[wp([A-z]{2,})>([^\|]+)\|([^\]]+)\]\]/ui' => '$3 ( https://$1.wikipedia.org/wiki/$2 )', - '/\[\[wp>([^\|]+)\|([^\]]+)\]\]/i' => '$2 ( https://en.wikipedia.org/wiki/$1 )', - '/\[\[wp([A-z]{2,})>([^\]]+)\]\]/i' => '$2 ( https://$1.wikipedia.org/wiki/$2 )', - '/\[\[wp>([^\]]+)\]\]/i' => '$1 ( https://en.wikipedia.org/wiki/$1 )', - - /// Dokuwiki - '/\[\[doku>([^\|]+)\|([^\]]+)\]\]/i' => '$2( https://www.dokuwiki.org/$1 )', - '/\[\[doku>([^\]]+)\]\]/i' => '$1( https://www.dokuwiki.org/$1 )', - - /// Index - /// Useful with src/Dokuwiki/Helper.php - '/\{\{indexmenu>:([^\}]+)\}\}/i' => '', - '/\{\{indexmenu_n>[\d]+\}\}/i' => '', - - // Related - '/\[\[this>([^\|]+)\|([^\]]+)\]\]/i' => '$2', - - /// Relative - '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\|]+)\|([^\]]+)\]\]/i' => ' $2$3 ( ~URL:base~$1 )', - '/\[\[(?!https?:|this|doku|wp[A-z]{0,2})([^\]]+)\]\]/i' => ' $2 ( ~URL:base~$1 )', - - /// Absolute - '/\[\[(https?:)([^\|]+)\|([^\]]+)\]\]/i' => '$3 ( $1$2 )', - '/\[\[(https?:)([^\]]+)\]\]/i' => '$1$2', // @TODO - - /// Media - '/\{\{(?!https?:)([^\|]+)\|([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, - '/\{\{(?!https?:)([^\}]+)\}\}/i' => PHP_EOL . '=> /$1$2' . PHP_EOL, - - // List - '/^[\s]?-/' => '* ', - '/^[\s]+\*/' => '*', - - // Separators - '/[\\\]{2}/' => '~LINE:break~', - - // Plugins - '/~~DISCUSSION~~/' => '', // @TODO - '/~~INFO:syntaxplugins~~/' => '', // @TODO - - // Final corrections - '/[\n\r]+[.,;:]+/' => PHP_EOL - ]; - - public function __construct(?array $rules = null) - { - if ($rules) - { - $this->_rule = $rules; - } - } - - // Macros operations - public function getMacroses(): array - { - $this->_macros; - } - - public function setMacroses(array $macros) - { - $this->_macros = $macros; - } - - public function getMacros(string $key, string $value): ?string - { - $this->_macros[$key] = isset($this->_macros[$key]) ? $value : null; - } - - public function setMacros(string $key, ?string $value): void - { - if ($value) - { - $this->_macros[$key] = $value; - } - - else - { - unset( - $this->_macros[$key] - ); - } - } - - // Rule operations - public function getRules(): array - { - $this->_rule; - } - - public function setRules(array $rules) - { - $this->_rule = $rules; - } - - public function getRule(string $key, string $value): ?string - { - $this->_rule[$key] = isset($this->_rule[$key]) ? $value : null; - } - - public function setRule(string $key, ?string $value): void - { - if ($value) - { - $this->_rule[$key] = $value; - } - - else - { - unset( - $this->_rule[$key] - ); - } - } - - // Convert DokuWiki text to Gemini - public function toGemini(?string $data, ?array &$lines = []): ?string - { - if (empty($data)) - { - return null; - } - - $raw = false; - - $lines = []; - - foreach ((array) explode(PHP_EOL, $data) as $line) - { - // Skip any formatting in lines between code tag - if (!$raw && preg_match('/<(code|file)([^>]*)>/i', $line, $matches)) - { - // Prepend tag meta or filename as plain description - if (!empty($matches[0])) - { - $lines[] = preg_replace( - '/<(code|file)[\s-]*([^>]*)>/i', - '$2', - $matches[0] - ); - } - - $lines[] = '```'; - $lines[] = preg_replace( - '/<\/?(code|file)[^>]*>/i', - '', - $line - ); - - $raw = true; - - // Make sure inline tag closed - if (preg_match('/<\/(code|file)>/i', $line)) - { - $lines[] = '```'; - - $raw = false; - - continue; - } - - continue; - } - - if ($raw && preg_match('/<\/(code|file)>/i', $line)) - { - $lines[] = preg_replace( - '/<\/(code|file)>/i', - '', - $line - ); - - $lines[] = '```'; - - $raw = false; - - continue; - } - - if ($raw) - { - $lines[] = preg_replace( - '/^```/', - ' ```', - $line - ); - - continue; - } - - // Apply config - $lines[] = preg_replace( - array_keys( - $this->_rule - ), - array_values( - $this->_rule - ), - strip_tags( - $line - ) - ); - } - - // ASCII table - $table = false; - - $rows = []; - - $th = []; - - foreach ($lines as $index => $line) - { - // Strip line breaks - $line = str_replace( - '~LINE:break~', - ' ', - $line - ); - - // Header - if (!$table && preg_match_all('/\^([^\^]+)/', $line, $matches)) - { - if (!empty($matches[1])) - { - $table = true; - - $rows = []; - - $th = []; - - foreach ($matches[1] as $value) - { - $th[] = trim( - $value - ); - } - - unset( - $lines[$index] - ); - - continue; - } - } - - // Body - if ($table) - { - $table = false; - - if (preg_match(sprintf('/%s\|/', str_repeat('\|(.*)', count($th))), $line, $matches)) - { - if (count($matches) == count($th) + 1) - { - $table = true; - - $row = []; - foreach ($th as $offset => $column) - { - $row[$column] = trim( - $matches[$offset + 1] - ); - } - - $rows[] = $row; - - unset( - $lines[$index] - ); - } - } - - if (!$table && $rows) - { - $builder = new ArrayToTextTable( - $rows - ); - - $lines[$index] = '```' . PHP_EOL . $builder->render() . PHP_EOL . '```'; - } - } - } - - // Merge lines - return preg_replace( - '/[\n\r]{2,}/', - PHP_EOL . PHP_EOL, - str_replace( - array_keys( - $this->_macros - ), - array_values( - $this->_macros - ), - implode( - PHP_EOL, - $lines - ) - ) - ); - } - - public function getH1(?string $gemini, ?string $regex = '/^[\s]?#([^#]+)/'): ?string - { - foreach ((array) explode(PHP_EOL, (string) $gemini) as $line) - { - preg_match( - $regex, - $line, - $matches - ); - - if (!empty($matches[1])) - { - return trim( - $matches[1] - ); - - break; - } - } - - return null; - } - - public function getLinks(?string $gemini, ?string $regex = '/(https?|gemini):\/\/\S+/'): array - { - $links = []; - - if (empty($gemini)) - { - return $links; - } - - preg_match_all( - $regex, - $gemini, - $matches - ); - - if (!empty($matches[0])) - { - foreach ((array) $matches[0] as $link) - { - $links[] = trim( - $link - ); - } - } - - return array_unique( - $links - ); - } -} \ No newline at end of file