mirror of
https://github.com/YGGverse/Yo.git
synced 2026-03-31 09:45:30 +00:00
save document body text to index
This commit is contained in:
parent
1f27a7e105
commit
3884f375d4
2 changed files with 43 additions and 0 deletions
|
|
@ -335,6 +335,45 @@ foreach($index->search('')
|
|||
$data['keywords'] = implode(',', $keywords);
|
||||
}
|
||||
|
||||
// Save document body text to index
|
||||
foreach ($crawler->filter('html > body')->each(function($node) {
|
||||
|
||||
return $node->html();
|
||||
|
||||
}) as $value)
|
||||
{
|
||||
if (!empty($value))
|
||||
{
|
||||
$data['body'] = trim(
|
||||
preg_replace(
|
||||
'/[\s]{2,}/', // strip extra separators
|
||||
' ',
|
||||
strip_tags(
|
||||
str_replace( // make text separators before strip any closing tag, new line, etc
|
||||
[
|
||||
'<',
|
||||
'>',
|
||||
PHP_EOL,
|
||||
],
|
||||
[
|
||||
' <',
|
||||
'> ',
|
||||
PHP_EOL . ' ',
|
||||
],
|
||||
preg_replace(
|
||||
'/<script([^>]*)>([^<]*)<\/script>/is', // strip js content
|
||||
'',
|
||||
html_entity_decode(
|
||||
$value
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Crawl documents
|
||||
$documents = [];
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,10 @@ $result = $index->create(
|
|||
[
|
||||
'type' => 'text'
|
||||
],
|
||||
'body' =>
|
||||
[
|
||||
'type' => 'text'
|
||||
],
|
||||
'mime' =>
|
||||
[
|
||||
'type' => 'text'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue