mirror of
https://github.com/YGGverse/YGGtracker.git
synced 2026-03-31 17:15:38 +00:00
implement transliteration word forms in search #33
This commit is contained in:
parent
c7c5d7340c
commit
997666ab8e
8 changed files with 144 additions and 58 deletions
|
|
@ -229,8 +229,6 @@ class TorrentController extends AbstractController
|
|||
$activityService
|
||||
);
|
||||
|
||||
//
|
||||
|
||||
// Init request
|
||||
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
|
||||
$page = $request->get('page') ? (int) $request->get('page') : 1;
|
||||
|
|
@ -883,13 +881,12 @@ class TorrentController extends AbstractController
|
|||
|
||||
$file->getPathName(),
|
||||
|
||||
(bool) $this->getParameter('app.index.torrent.name'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
||||
(bool) $this->getParameter('app.index.torrent.source'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
||||
(bool) $this->getParameter('app.index.transliteration'),
|
||||
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||
(int) $this->getParameter('app.index.word.length.min'),
|
||||
(int) $this->getParameter('app.index.word.length.max'),
|
||||
|
||||
|
|
@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController
|
|||
{
|
||||
// Reindex keywords
|
||||
$torrentService->reindexTorrentKeywordsAll(
|
||||
(bool) $this->getParameter('app.index.torrent.name'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
||||
(bool) $this->getParameter('app.index.torrent.source'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
||||
(bool) $this->getParameter('app.index.transliteration'),
|
||||
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||
(int) $this->getParameter('app.index.word.length.min'),
|
||||
(int) $this->getParameter('app.index.word.length.max')
|
||||
);
|
||||
|
|
|
|||
|
|
@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository
|
|||
int $userId,
|
||||
array $keywords,
|
||||
array $locales,
|
||||
?bool $sensitive = null,
|
||||
?bool $approved = null,
|
||||
?bool $status = null,
|
||||
?bool $sensitive = null,
|
||||
?bool $approved = null,
|
||||
?bool $status = null
|
||||
): \Doctrine\ORM\QueryBuilder
|
||||
{
|
||||
$query = $this->createQueryBuilder('t');
|
||||
|
||||
if ($keywords)
|
||||
{
|
||||
$andKeywords = $query->expr()->andX();
|
||||
|
||||
foreach ($keywords as $i => $keyword)
|
||||
{
|
||||
$keyword = mb_strtolower($keyword); // all keywords stored in lowercase
|
||||
// Make query to the index case insensitive
|
||||
$keyword = mb_strtolower($keyword);
|
||||
|
||||
$andKeywords->add("t.keywords LIKE :keyword{$i}");
|
||||
// Init OR condition for each word form
|
||||
$orKeywords = $query->expr()->orX();
|
||||
|
||||
$orKeywords->add("t.keywords LIKE :keyword{$i}");
|
||||
$query->setParameter(":keyword{$i}", "%{$keyword}%");
|
||||
}
|
||||
|
||||
$query->andWhere($andKeywords);
|
||||
// Generate word forms for each transliteration locale #33
|
||||
foreach ($this->generateWordForms($keyword) as $j => $wordForm)
|
||||
{
|
||||
$orKeywords->add("t.keywords LIKE :keyword{$i}{$j}");
|
||||
$query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%");
|
||||
}
|
||||
|
||||
// Append AND condition
|
||||
$query->andWhere($orKeywords);
|
||||
}
|
||||
}
|
||||
|
||||
if ($locales)
|
||||
|
|
@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository
|
|||
|
||||
return $query;
|
||||
}
|
||||
|
||||
// Word forms generator to improve search results
|
||||
// e.g. transliteration rules for latin filenames
|
||||
private function generateWordForms(
|
||||
string $keyword,
|
||||
// #33 supported locales:
|
||||
// https://github.com/ashtokalo/php-translit
|
||||
array $transliteration = [
|
||||
'be',
|
||||
'bg',
|
||||
'el',
|
||||
'hy',
|
||||
'kk',
|
||||
'mk',
|
||||
'ru',
|
||||
'ka',
|
||||
'uk'
|
||||
],
|
||||
// Additional char forms
|
||||
array $charForms =
|
||||
[
|
||||
'c' => 'k',
|
||||
'k' => 'c',
|
||||
]
|
||||
): array
|
||||
{
|
||||
$wordForms = [];
|
||||
|
||||
// Apply transliteration
|
||||
foreach ($transliteration as $locale)
|
||||
{
|
||||
$wordForms[] = \ashtokalo\translit\Translit::object()->convert(
|
||||
$keyword,
|
||||
$locale
|
||||
);
|
||||
}
|
||||
|
||||
// Apply char forms
|
||||
foreach ($wordForms as $wordForm)
|
||||
{
|
||||
foreach ($charForms as $from => $to)
|
||||
{
|
||||
$wordForms[] = str_replace(
|
||||
$from,
|
||||
$to,
|
||||
$wordForm
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
return array_unique(
|
||||
$wordForms
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ class TorrentService
|
|||
|
||||
public function generateTorrentKeywordsByString(
|
||||
string $string,
|
||||
bool $transliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax,
|
||||
): array
|
||||
|
|
@ -97,11 +96,6 @@ class TorrentService
|
|||
{
|
||||
// Apply case insensitive search conversion
|
||||
$words[$key] = mb_strtolower($value);
|
||||
|
||||
if ($transliteration)
|
||||
{
|
||||
// @TODO
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -129,7 +123,6 @@ class TorrentService
|
|||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax
|
||||
|
||||
|
|
@ -147,7 +140,6 @@ class TorrentService
|
|||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$name,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
|
@ -163,7 +155,6 @@ class TorrentService
|
|||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$list['path'],
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
|
@ -179,7 +170,6 @@ class TorrentService
|
|||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$source,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
|
@ -195,7 +185,6 @@ class TorrentService
|
|||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$comment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
|
@ -301,7 +290,6 @@ class TorrentService
|
|||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax,
|
||||
|
||||
|
|
@ -326,7 +314,6 @@ class TorrentService
|
|||
$extractInfoHashV2,
|
||||
$extractSource,
|
||||
$extractComment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
),
|
||||
|
|
@ -623,7 +610,6 @@ class TorrentService
|
|||
bool $extractInfoHashV2,
|
||||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax
|
||||
): void
|
||||
|
|
@ -643,7 +629,6 @@ class TorrentService
|
|||
$extractInfoHashV2,
|
||||
$extractSource,
|
||||
$extractComment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue