mirror of
https://github.com/YGGverse/Yo.git
synced 2026-03-31 17:55:35 +00:00
add crc32url filter
This commit is contained in:
parent
8a827bfcdf
commit
dfb2c06738
4 changed files with 44 additions and 25 deletions
|
|
@ -23,8 +23,13 @@ $index = $client->index(
|
||||||
$config->manticore->index->document->name
|
$config->manticore->index->document->name
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Prepare URL
|
||||||
|
$url = trim($argv[1]);
|
||||||
|
$crc32url = crc32($url);
|
||||||
|
|
||||||
// Check URL for exist
|
// Check URL for exist
|
||||||
$result = $index->search('@url "' . trim($argv[1]) . '"')
|
$result = $index->search('@url "' . $url . '"')
|
||||||
|
->filter('crc32url', $crc32url)
|
||||||
->limit(1)
|
->limit(1)
|
||||||
->get();
|
->get();
|
||||||
|
|
||||||
|
|
@ -32,7 +37,7 @@ if ($result->getTotal())
|
||||||
{
|
{
|
||||||
echo sprintf(
|
echo sprintf(
|
||||||
'URL "%s" already exists in "%s" index!' . PHP_EOL,
|
'URL "%s" already exists in "%s" index!' . PHP_EOL,
|
||||||
$argv[1],
|
$url,
|
||||||
$config->manticore->index->document->name
|
$config->manticore->index->document->name
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -42,13 +47,14 @@ if ($result->getTotal())
|
||||||
// Add
|
// Add
|
||||||
$result = $index->addDocument(
|
$result = $index->addDocument(
|
||||||
[
|
[
|
||||||
'url' => trim($argv[1])
|
'url' => $url,
|
||||||
|
'crc32url' => $crc32url
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
echo sprintf(
|
echo sprintf(
|
||||||
'URL "%s" added to "%s" index: %s' . PHP_EOL,
|
'URL "%s" added to "%s" index: %s' . PHP_EOL,
|
||||||
$argv[1],
|
$url,
|
||||||
$config->manticore->index->document->name,
|
$config->manticore->index->document->name,
|
||||||
print_r(
|
print_r(
|
||||||
$result,
|
$result,
|
||||||
|
|
|
||||||
|
|
@ -253,15 +253,18 @@ foreach($search->get() as $document)
|
||||||
foreach (array_unique($documents) as $url)
|
foreach (array_unique($documents) as $url)
|
||||||
{
|
{
|
||||||
$url = trim($url);
|
$url = trim($url);
|
||||||
|
$crc32url = crc32($url);
|
||||||
|
|
||||||
if (!$index->search('@url "' . $url . '"')
|
if (!$index->search('@url "' . $url . '"')
|
||||||
|
->filter('crc32url', $crc32url)
|
||||||
->limit(1)
|
->limit(1)
|
||||||
->get()
|
->get()
|
||||||
->getTotal())
|
->getTotal())
|
||||||
{
|
{
|
||||||
$index->addDocument(
|
$index->addDocument(
|
||||||
[
|
[
|
||||||
'url' => $url
|
'url' => $url,
|
||||||
|
'crc32url' => $crc32url
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,10 @@ $result = $index->create(
|
||||||
'time' =>
|
'time' =>
|
||||||
[
|
[
|
||||||
'type' => 'integer'
|
'type' => 'integer'
|
||||||
|
],
|
||||||
|
'crc32url' =>
|
||||||
|
[
|
||||||
|
'type' => 'bigint'
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
(array) $config->manticore->index->document->settings
|
(array) $config->manticore->index->document->settings
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ $placeholder = plural(
|
||||||
$response = false;
|
$response = false;
|
||||||
|
|
||||||
// Request
|
// Request
|
||||||
$q = !empty($_GET['q']) ? $_GET['q'] : '';
|
$q = !empty($_GET['q']) ? trim($_GET['q']) : '';
|
||||||
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
|
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
|
||||||
|
|
||||||
// Register new URL by request on enabled
|
// Register new URL by request on enabled
|
||||||
|
|
@ -77,10 +77,13 @@ if ($config->webui->search->index->request->url->enabled)
|
||||||
{
|
{
|
||||||
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match($config->webui->search->index->request->url->regex, $q))
|
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match($config->webui->search->index->request->url->regex, $q))
|
||||||
{
|
{
|
||||||
$url = trim($q);
|
// Prepare URL
|
||||||
|
$url = $q;
|
||||||
|
$crc32url = crc32($url);
|
||||||
|
|
||||||
// Check URL for exist
|
// Check URL for exist
|
||||||
$exist = $index->search('@url "' . trim($url) . '"')
|
$exist = $index->search('@url "' . $url . '"')
|
||||||
|
->filter('crc32url', $crc32url)
|
||||||
->limit(1)
|
->limit(1)
|
||||||
->get()
|
->get()
|
||||||
->getTotal();
|
->getTotal();
|
||||||
|
|
@ -90,7 +93,7 @@ if ($config->webui->search->index->request->url->enabled)
|
||||||
/* disable as regular search request possible
|
/* disable as regular search request possible
|
||||||
$response = sprintf(
|
$response = sprintf(
|
||||||
_('URL "%s" exists in search index'),
|
_('URL "%s" exists in search index'),
|
||||||
htmlentities($url)
|
htmlentities($q)
|
||||||
);
|
);
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
@ -98,36 +101,39 @@ if ($config->webui->search->index->request->url->enabled)
|
||||||
// Add URL
|
// Add URL
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// @TODO check http code
|
||||||
|
|
||||||
$index->addDocument(
|
$index->addDocument(
|
||||||
[
|
[
|
||||||
'url' => trim($url)
|
'url' => $url,
|
||||||
|
'crc32url' => $crc32url
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
$response = sprintf(
|
$response = sprintf(
|
||||||
_('URL "%s" added to the crawl queue!'),
|
_('URL "%s" added to the crawl queue!'),
|
||||||
htmlentities($url)
|
htmlentities($q)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extended syntax corrections
|
// Extended syntax corrections
|
||||||
$query = trim($q);
|
switch (true)
|
||||||
|
|
||||||
if (filter_var($q, FILTER_VALIDATE_URL))
|
|
||||||
{
|
{
|
||||||
$query = '@url "' . $q . '"';
|
case filter_var($q, FILTER_VALIDATE_URL):
|
||||||
}
|
|
||||||
|
|
||||||
elseif (false === strpos($q, '"'))
|
$query = $index->search('@url "' . $q . '"')->filter('crc32url', crc32($q));
|
||||||
{
|
|
||||||
$query = '"' . $q . '"';
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
|
||||||
|
$query = $index->search($q);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search request begin
|
// Search request begin
|
||||||
$results = $index->search($query)
|
$results = $query->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
|
||||||
->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
|
|
||||||
->limit($config->webui->pagination->limit)
|
->limit($config->webui->pagination->limit)
|
||||||
->get();
|
->get();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue