add crc32url filter

This commit is contained in:
ghost 2023-11-25 18:10:23 +02:00
parent 8a827bfcdf
commit dfb2c06738
4 changed files with 44 additions and 25 deletions

View file

@ -23,8 +23,13 @@ $index = $client->index(
$config->manticore->index->document->name
);
// Prepare URL
$url = trim($argv[1]);
$crc32url = crc32($url);
// Check URL for exist
$result = $index->search('@url "' . trim($argv[1]) . '"')
$result = $index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1)
->get();
@ -32,7 +37,7 @@ if ($result->getTotal())
{
echo sprintf(
'URL "%s" already exists in "%s" index!' . PHP_EOL,
$argv[1],
$url,
$config->manticore->index->document->name
);
@ -42,13 +47,14 @@ if ($result->getTotal())
// Add
$result = $index->addDocument(
[
'url' => trim($argv[1])
'url' => $url,
'crc32url' => $crc32url
]
);
echo sprintf(
'URL "%s" added to "%s" index: %s' . PHP_EOL,
$argv[1],
$url,
$config->manticore->index->document->name,
print_r(
$result,

View file

@ -252,16 +252,19 @@ foreach($search->get() as $document)
{
foreach (array_unique($documents) as $url)
{
$url = trim($url);
$url = trim($url);
$crc32url = crc32($url);
if (!$index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1)
->get()
->getTotal())
{
$index->addDocument(
[
'url' => $url
'url' => $url,
'crc32url' => $crc32url
]
);