From ed21836f135384dee2b6025ba8e6aa80cc4684cd Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 11 Nov 2025 20:11:48 +0200 Subject: [PATCH] fix redirection entry offset in queue to prevent infinitive crawl; optimize duplicated constructions --- src/Controller/Cli.php | 47 +++++++++++++----------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/src/Controller/Cli.php b/src/Controller/Cli.php index e162e87..ccb01a5 100755 --- a/src/Controller/Cli.php +++ b/src/Controller/Cli.php @@ -66,18 +66,16 @@ class Cli } // Updates address in crawler queue - public function setSource( - int $offset, + public function putSource( + int $position, string $url ): bool { - // Validate given value and check it is unique in the pool - if (isset($this->source[$offset]) && $this->_source($url) && $this->source[$offset] != $url) - { - $this->source[$offset] = $url; - + if (in_array($url, $this->source)) return true; - } + + if ($this->_source($url)) + return !array_splice($this->source, $position, 0, [$position => $url]); return false; } @@ -203,15 +201,8 @@ class Cli // Validate redirection target location if (filter_var($response->getMeta(), FILTER_VALIDATE_URL)) // @TODO resolve relative locations { - // Apply redirection target to the current destination - if ($this->setSource($offset, trim($response->getMeta()))) - { - // Rescan current destination using updated location - $this->start( - $offset - ); - } - else + // Insert redirection target to the next destination + if (!$this->putSource($offset + 1, trim($response->getMeta()))) { print( Message::red( @@ -221,11 +212,6 @@ class Cli ) ) ); - - // Continue next location - $this->start( - $offset + 1 - ); } } else @@ -238,11 +224,6 @@ class Cli ) ) ); - - // Continue next location - $this->start( - $offset + 1 - ); } } else @@ -255,15 +236,15 @@ class Cli ) ) ); - - // Continue next location - $this->start( - $offset + 1 - ); } + + // Continue next location + $this->start( + $offset + 1 + ); } - return; // panic @TODO + return; // panic? @TODO break; default: // failure