From 2257ce771fb300fcf065862f519c96388ba8c484 Mon Sep 17 00:00:00 2001 From: yggverse Date: Wed, 20 Mar 2024 20:18:55 +0200 Subject: [PATCH] apply cleaner to the current url configuration --- src/cli/document/clean.php | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/cli/document/clean.php b/src/cli/document/clean.php index a2e86b8..a5866b2 100644 --- a/src/cli/document/clean.php +++ b/src/cli/document/clean.php @@ -36,9 +36,40 @@ $index = $client->index( $config->manticore->index->document->name ); +// Apply new configuration rules +echo _('apply new configuration rules...') . PHP_EOL; + +foreach ($config->cli->document->crawl->skip->stripos->url as $condition) +{ + echo sprintf( + _('cleanup documents with url that contain substring "%s"...') . PHP_EOL, + $condition + ); + + $query = new \Manticoresearch\Query(); + + $query->add( + 'url', + @\Manticoresearch\Utils::escape( + $condition + ) + ); + + $result = $index->deleteDocuments( + $query + ); + + echo sprintf( + _('documents deleted: %d') . PHP_EOL, + $result['deleted'] + ); +} + +echo _('new configuration rules apply completed.') . PHP_EOL; + // Optimize indexes -echo _('indexes optimization begin') . PHP_EOL; +echo _('indexes optimization begin...') . PHP_EOL; $index->optimize(); -echo _('indexes optimization completed') . PHP_EOL; \ No newline at end of file +echo _('indexes optimization completed.') . PHP_EOL; \ No newline at end of file