fix redirection entry offset in queue to prevent infinitive crawl; optimize duplicated constructions

This commit is contained in:
yggverse 2025-11-11 20:11:48 +02:00
parent a8513f777e
commit ed21836f13

View file

@ -66,18 +66,16 @@ class Cli
} }
// Updates address in crawler queue // Updates address in crawler queue
public function setSource( public function putSource(
int $offset, int $position,
string $url string $url
): bool ): bool
{ {
// Validate given value and check it is unique in the pool if (in_array($url, $this->source))
if (isset($this->source[$offset]) && $this->_source($url) && $this->source[$offset] != $url)
{
$this->source[$offset] = $url;
return true; return true;
}
if ($this->_source($url))
return !array_splice($this->source, $position, 0, [$position => $url]);
return false; return false;
} }
@ -203,15 +201,8 @@ class Cli
// Validate redirection target location // Validate redirection target location
if (filter_var($response->getMeta(), FILTER_VALIDATE_URL)) // @TODO resolve relative locations if (filter_var($response->getMeta(), FILTER_VALIDATE_URL)) // @TODO resolve relative locations
{ {
// Apply redirection target to the current destination // Insert redirection target to the next destination
if ($this->setSource($offset, trim($response->getMeta()))) if (!$this->putSource($offset + 1, trim($response->getMeta())))
{
// Rescan current destination using updated location
$this->start(
$offset
);
}
else
{ {
print( print(
Message::red( Message::red(
@ -221,11 +212,6 @@ class Cli
) )
) )
); );
// Continue next location
$this->start(
$offset + 1
);
} }
} }
else else
@ -238,11 +224,6 @@ class Cli
) )
) )
); );
// Continue next location
$this->start(
$offset + 1
);
} }
} }
else else
@ -255,15 +236,15 @@ class Cli
) )
) )
); );
// Continue next location
$this->start(
$offset + 1
);
} }
// Continue next location
$this->start(
$offset + 1
);
} }
return; // panic @TODO return; // panic? @TODO
break; break;
default: // failure default: // failure