mirror of
https://github.com/YGGverse/Yo.git
synced 2026-03-31 09:45:30 +00:00
crawl newest pages by rand in queue
This commit is contained in:
parent
811c700049
commit
33cc778999
1 changed files with 10 additions and 26 deletions
|
|
@ -50,30 +50,6 @@ try {
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Init search
|
|
||||||
$search = new \Manticoresearch\Search(
|
|
||||||
$client
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->setIndex(
|
|
||||||
$config->manticore->index->document->name
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->match(
|
|
||||||
'*',
|
|
||||||
'url'
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->sort(
|
|
||||||
'time',
|
|
||||||
'asc'
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->limit(
|
|
||||||
$config->cli->document->crawl->queue->limit
|
|
||||||
);
|
|
||||||
|
|
||||||
// Init index
|
|
||||||
$index = $client->index(
|
$index = $client->index(
|
||||||
$config->manticore->index->document->name
|
$config->manticore->index->document->name
|
||||||
);
|
);
|
||||||
|
|
@ -105,8 +81,16 @@ if ($config->cli->document->crawl->debug->level->notice)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Begin queue
|
// Begin crawl queue
|
||||||
foreach($search->get() as $document)
|
// thanks to @manticoresearch for help with random feature implementation:
|
||||||
|
// https://github.com/manticoresoftware/manticoresearch-php/discussions/176
|
||||||
|
|
||||||
|
foreach($index->search('')
|
||||||
|
->expression('random', 'rand()')
|
||||||
|
->sort('time', 'asc')
|
||||||
|
->sort('random', 'asc')
|
||||||
|
->limit($config->cli->document->crawl->queue->limit)
|
||||||
|
->get() as $document)
|
||||||
{
|
{
|
||||||
// Define data
|
// Define data
|
||||||
$time = time();
|
$time = time();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue