From be7c63e68a9783000e5ee07a9f4379cee84f6a1b Mon Sep 17 00:00:00 2001 From: yggverse Date: Thu, 21 Mar 2024 18:58:30 +0200 Subject: [PATCH] make sure document contain exact substring in URL --- src/cli/document/clean.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/cli/document/clean.php b/src/cli/document/clean.php index 4b123ee..d76c347 100644 --- a/src/cli/document/clean.php +++ b/src/cli/document/clean.php @@ -83,12 +83,20 @@ foreach ($config->cli->document->crawl->skip->stripos->url as $condition) $index->search( sprintf( '@url "%s"', - $condition + @\Manticoresearch\Utils::escape( + $condition + ) ) )->limit( isset($argv[1]) ? (int) $argv[1] : 10 )->get() as $document) { + // Make sure document contain exact substring in URL + if (false === mb_strpos($document->get('url'), $condition)) + { + continue; + } + // Delete found document by it ID $result = $index->deleteDocument( $document->getId()