mirror of
https://github.com/YGGverse/YGGtracker.git
synced 2026-04-01 17:45:31 +00:00
implement transliteration word forms in search #33
This commit is contained in:
parent
c7c5d7340c
commit
997666ab8e
8 changed files with 144 additions and 58 deletions
15
.env
15
.env
|
|
@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker
|
||||||
APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1
|
APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1
|
||||||
|
|
||||||
# Enable search index for torrent name
|
# Enable search index for torrent name
|
||||||
APP_INDEX_TORRENT_NAME=1
|
APP_INDEX_TORRENT_NAME_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for torrent info hash v1
|
# Enable search index for torrent info hash v1
|
||||||
APP_INDEX_TORRENT_HASH_V1=1
|
APP_INDEX_TORRENT_HASH_V1_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for torrent info hash v2
|
# Enable search index for torrent info hash v2
|
||||||
APP_INDEX_TORRENT_HASH_V2=1
|
APP_INDEX_TORRENT_HASH_V2_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for torrent filenames
|
# Enable search index for torrent filenames
|
||||||
APP_INDEX_TORRENT_FILENAMES=1
|
APP_INDEX_TORRENT_FILENAMES_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for torrent source
|
# Enable search index for torrent source
|
||||||
APP_INDEX_TORRENT_SOURCE=1
|
APP_INDEX_TORRENT_SOURCE_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for torrent comment
|
# Enable search index for torrent comment
|
||||||
APP_INDEX_TORRENT_COMMENT=1
|
APP_INDEX_TORRENT_COMMENT_ENABLED=1
|
||||||
|
|
||||||
# Enable search index for words length greater than N chars
|
# Enable search index for words length greater than N chars
|
||||||
APP_INDEX_WORD_LENGTH_MIN=3
|
APP_INDEX_WORD_LENGTH_MIN=3
|
||||||
|
|
||||||
# Enable search index for words length not greater than N chars
|
# Enable search index for words length not greater than N chars
|
||||||
APP_INDEX_WORD_LENGTH_MAX=255
|
APP_INDEX_WORD_LENGTH_MAX=255
|
||||||
|
|
||||||
# Enable search index transliteration @TODO
|
|
||||||
APP_INDEX_TRANSLITERATION=1
|
|
||||||
|
|
@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name
|
||||||
* [SVG icons](https://icons.getbootstrap.com)
|
* [SVG icons](https://icons.getbootstrap.com)
|
||||||
* [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer)
|
* [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer)
|
||||||
* [Bencode Library](https://github.com/Rhilip/Bencode)
|
* [Bencode Library](https://github.com/Rhilip/Bencode)
|
||||||
|
* [Transliteration Library](https://github.com/ashtokalo/php-translit)
|
||||||
* [Identicons](https://github.com/dmester/jdenticon-php)
|
* [Identicons](https://github.com/dmester/jdenticon-php)
|
||||||
|
|
||||||
#### Support
|
#### Support
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
"php": ">=8.1",
|
"php": ">=8.1",
|
||||||
"ext-ctype": "*",
|
"ext-ctype": "*",
|
||||||
"ext-iconv": "*",
|
"ext-iconv": "*",
|
||||||
|
"ashtokalo/php-translit": "^0.2.0",
|
||||||
"doctrine/annotations": "^2.0",
|
"doctrine/annotations": "^2.0",
|
||||||
"doctrine/doctrine-bundle": "^2.10",
|
"doctrine/doctrine-bundle": "^2.10",
|
||||||
"doctrine/doctrine-migrations-bundle": "^3.2",
|
"doctrine/doctrine-migrations-bundle": "^3.2",
|
||||||
|
|
|
||||||
45
composer.lock
generated
45
composer.lock
generated
|
|
@ -4,8 +4,51 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "3770ffcd80695bc10a22f8ece4f68d1f",
|
"content-hash": "4d930a43cf9a80e1622029c4a4048a6b",
|
||||||
"packages": [
|
"packages": [
|
||||||
|
{
|
||||||
|
"name": "ashtokalo/php-translit",
|
||||||
|
"version": "0.2.0",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/ashtokalo/php-translit.git",
|
||||||
|
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/ashtokalo/php-translit/zipball/8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
|
||||||
|
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"php": ">=7.0"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"phpunit/phpunit": "~7.0"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"ashtokalo\\translit\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"description": "PHP library to convert text from one script to another.",
|
||||||
|
"keywords": [
|
||||||
|
"latinization",
|
||||||
|
"romanization",
|
||||||
|
"translit",
|
||||||
|
"transliteration"
|
||||||
|
],
|
||||||
|
"support": {
|
||||||
|
"issues": "https://github.com/ashtokalo/php-translit/issues",
|
||||||
|
"source": "https://github.com/ashtokalo/php-translit/tree/0.2.0"
|
||||||
|
},
|
||||||
|
"time": "2022-09-26T09:05:24+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "doctrine/annotations",
|
"name": "doctrine/annotations",
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
|
|
|
||||||
|
|
@ -21,15 +21,14 @@ parameters:
|
||||||
app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%'
|
app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%'
|
||||||
app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%'
|
app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%'
|
||||||
app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%'
|
app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%'
|
||||||
app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%'
|
app.index.torrent.name.enabled: '%env(APP_INDEX_TORRENT_NAME_ENABLED)%'
|
||||||
app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%'
|
app.index.torrent.filenames.enabled: '%env(APP_INDEX_TORRENT_FILENAMES_ENABLED)%'
|
||||||
app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%'
|
app.index.torrent.hash.v1.enabled: '%env(APP_INDEX_TORRENT_HASH_V1_ENABLED)%'
|
||||||
app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%'
|
app.index.torrent.hash.v2.enabled: '%env(APP_INDEX_TORRENT_HASH_V2_ENABLED)%'
|
||||||
app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%'
|
app.index.torrent.source.enabled: '%env(APP_INDEX_TORRENT_SOURCE_ENABLED)%'
|
||||||
app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%'
|
app.index.torrent.comment.enabled: '%env(APP_INDEX_TORRENT_COMMENT_ENABLED)%'
|
||||||
app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%'
|
app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%'
|
||||||
app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%'
|
app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%'
|
||||||
app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
# default configuration for services in *this* file
|
# default configuration for services in *this* file
|
||||||
|
|
|
||||||
|
|
@ -229,8 +229,6 @@ class TorrentController extends AbstractController
|
||||||
$activityService
|
$activityService
|
||||||
);
|
);
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
// Init request
|
// Init request
|
||||||
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
|
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
|
||||||
$page = $request->get('page') ? (int) $request->get('page') : 1;
|
$page = $request->get('page') ? (int) $request->get('page') : 1;
|
||||||
|
|
@ -883,13 +881,12 @@ class TorrentController extends AbstractController
|
||||||
|
|
||||||
$file->getPathName(),
|
$file->getPathName(),
|
||||||
|
|
||||||
(bool) $this->getParameter('app.index.torrent.name'),
|
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.source'),
|
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||||
(bool) $this->getParameter('app.index.transliteration'),
|
|
||||||
(int) $this->getParameter('app.index.word.length.min'),
|
(int) $this->getParameter('app.index.word.length.min'),
|
||||||
(int) $this->getParameter('app.index.word.length.max'),
|
(int) $this->getParameter('app.index.word.length.max'),
|
||||||
|
|
||||||
|
|
@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController
|
||||||
{
|
{
|
||||||
// Reindex keywords
|
// Reindex keywords
|
||||||
$torrentService->reindexTorrentKeywordsAll(
|
$torrentService->reindexTorrentKeywordsAll(
|
||||||
(bool) $this->getParameter('app.index.torrent.name'),
|
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.source'),
|
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||||
(bool) $this->getParameter('app.index.transliteration'),
|
|
||||||
(int) $this->getParameter('app.index.word.length.min'),
|
(int) $this->getParameter('app.index.word.length.min'),
|
||||||
(int) $this->getParameter('app.index.word.length.max')
|
(int) $this->getParameter('app.index.word.length.max')
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository
|
||||||
int $userId,
|
int $userId,
|
||||||
array $keywords,
|
array $keywords,
|
||||||
array $locales,
|
array $locales,
|
||||||
?bool $sensitive = null,
|
?bool $sensitive = null,
|
||||||
?bool $approved = null,
|
?bool $approved = null,
|
||||||
?bool $status = null,
|
?bool $status = null
|
||||||
): \Doctrine\ORM\QueryBuilder
|
): \Doctrine\ORM\QueryBuilder
|
||||||
{
|
{
|
||||||
$query = $this->createQueryBuilder('t');
|
$query = $this->createQueryBuilder('t');
|
||||||
|
|
||||||
if ($keywords)
|
if ($keywords)
|
||||||
{
|
{
|
||||||
$andKeywords = $query->expr()->andX();
|
|
||||||
|
|
||||||
foreach ($keywords as $i => $keyword)
|
foreach ($keywords as $i => $keyword)
|
||||||
{
|
{
|
||||||
$keyword = mb_strtolower($keyword); // all keywords stored in lowercase
|
// Make query to the index case insensitive
|
||||||
|
$keyword = mb_strtolower($keyword);
|
||||||
|
|
||||||
$andKeywords->add("t.keywords LIKE :keyword{$i}");
|
// Init OR condition for each word form
|
||||||
|
$orKeywords = $query->expr()->orX();
|
||||||
|
|
||||||
|
$orKeywords->add("t.keywords LIKE :keyword{$i}");
|
||||||
$query->setParameter(":keyword{$i}", "%{$keyword}%");
|
$query->setParameter(":keyword{$i}", "%{$keyword}%");
|
||||||
}
|
|
||||||
|
|
||||||
$query->andWhere($andKeywords);
|
// Generate word forms for each transliteration locale #33
|
||||||
|
foreach ($this->generateWordForms($keyword) as $j => $wordForm)
|
||||||
|
{
|
||||||
|
$orKeywords->add("t.keywords LIKE :keyword{$i}{$j}");
|
||||||
|
$query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append AND condition
|
||||||
|
$query->andWhere($orKeywords);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($locales)
|
if ($locales)
|
||||||
|
|
@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository
|
||||||
|
|
||||||
return $query;
|
return $query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Word forms generator to improve search results
|
||||||
|
// e.g. transliteration rules for latin filenames
|
||||||
|
private function generateWordForms(
|
||||||
|
string $keyword,
|
||||||
|
// #33 supported locales:
|
||||||
|
// https://github.com/ashtokalo/php-translit
|
||||||
|
array $transliteration = [
|
||||||
|
'be',
|
||||||
|
'bg',
|
||||||
|
'el',
|
||||||
|
'hy',
|
||||||
|
'kk',
|
||||||
|
'mk',
|
||||||
|
'ru',
|
||||||
|
'ka',
|
||||||
|
'uk'
|
||||||
|
],
|
||||||
|
// Additional char forms
|
||||||
|
array $charForms =
|
||||||
|
[
|
||||||
|
'c' => 'k',
|
||||||
|
'k' => 'c',
|
||||||
|
]
|
||||||
|
): array
|
||||||
|
{
|
||||||
|
$wordForms = [];
|
||||||
|
|
||||||
|
// Apply transliteration
|
||||||
|
foreach ($transliteration as $locale)
|
||||||
|
{
|
||||||
|
$wordForms[] = \ashtokalo\translit\Translit::object()->convert(
|
||||||
|
$keyword,
|
||||||
|
$locale
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply char forms
|
||||||
|
foreach ($wordForms as $wordForm)
|
||||||
|
{
|
||||||
|
foreach ($charForms as $from => $to)
|
||||||
|
{
|
||||||
|
$wordForms[] = str_replace(
|
||||||
|
$from,
|
||||||
|
$to,
|
||||||
|
$wordForm
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove duplicates
|
||||||
|
return array_unique(
|
||||||
|
$wordForms
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,6 @@ class TorrentService
|
||||||
|
|
||||||
public function generateTorrentKeywordsByString(
|
public function generateTorrentKeywordsByString(
|
||||||
string $string,
|
string $string,
|
||||||
bool $transliteration,
|
|
||||||
int $wordLengthMin,
|
int $wordLengthMin,
|
||||||
int $wordLengthMax,
|
int $wordLengthMax,
|
||||||
): array
|
): array
|
||||||
|
|
@ -97,11 +96,6 @@ class TorrentService
|
||||||
{
|
{
|
||||||
// Apply case insensitive search conversion
|
// Apply case insensitive search conversion
|
||||||
$words[$key] = mb_strtolower($value);
|
$words[$key] = mb_strtolower($value);
|
||||||
|
|
||||||
if ($transliteration)
|
|
||||||
{
|
|
||||||
// @TODO
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -129,7 +123,6 @@ class TorrentService
|
||||||
bool $extractSource,
|
bool $extractSource,
|
||||||
bool $extractComment,
|
bool $extractComment,
|
||||||
|
|
||||||
bool $wordTransliteration,
|
|
||||||
int $wordLengthMin,
|
int $wordLengthMin,
|
||||||
int $wordLengthMax
|
int $wordLengthMax
|
||||||
|
|
||||||
|
|
@ -147,7 +140,6 @@ class TorrentService
|
||||||
$keywords,
|
$keywords,
|
||||||
$this->generateTorrentKeywordsByString(
|
$this->generateTorrentKeywordsByString(
|
||||||
$name,
|
$name,
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
)
|
)
|
||||||
|
|
@ -163,7 +155,6 @@ class TorrentService
|
||||||
$keywords,
|
$keywords,
|
||||||
$this->generateTorrentKeywordsByString(
|
$this->generateTorrentKeywordsByString(
|
||||||
$list['path'],
|
$list['path'],
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
)
|
)
|
||||||
|
|
@ -179,7 +170,6 @@ class TorrentService
|
||||||
$keywords,
|
$keywords,
|
||||||
$this->generateTorrentKeywordsByString(
|
$this->generateTorrentKeywordsByString(
|
||||||
$source,
|
$source,
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
)
|
)
|
||||||
|
|
@ -195,7 +185,6 @@ class TorrentService
|
||||||
$keywords,
|
$keywords,
|
||||||
$this->generateTorrentKeywordsByString(
|
$this->generateTorrentKeywordsByString(
|
||||||
$comment,
|
$comment,
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
)
|
)
|
||||||
|
|
@ -301,7 +290,6 @@ class TorrentService
|
||||||
bool $extractSource,
|
bool $extractSource,
|
||||||
bool $extractComment,
|
bool $extractComment,
|
||||||
|
|
||||||
bool $wordTransliteration,
|
|
||||||
int $wordLengthMin,
|
int $wordLengthMin,
|
||||||
int $wordLengthMax,
|
int $wordLengthMax,
|
||||||
|
|
||||||
|
|
@ -326,7 +314,6 @@ class TorrentService
|
||||||
$extractInfoHashV2,
|
$extractInfoHashV2,
|
||||||
$extractSource,
|
$extractSource,
|
||||||
$extractComment,
|
$extractComment,
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
),
|
),
|
||||||
|
|
@ -623,7 +610,6 @@ class TorrentService
|
||||||
bool $extractInfoHashV2,
|
bool $extractInfoHashV2,
|
||||||
bool $extractSource,
|
bool $extractSource,
|
||||||
bool $extractComment,
|
bool $extractComment,
|
||||||
bool $wordTransliteration,
|
|
||||||
int $wordLengthMin,
|
int $wordLengthMin,
|
||||||
int $wordLengthMax
|
int $wordLengthMax
|
||||||
): void
|
): void
|
||||||
|
|
@ -643,7 +629,6 @@ class TorrentService
|
||||||
$extractInfoHashV2,
|
$extractInfoHashV2,
|
||||||
$extractSource,
|
$extractSource,
|
||||||
$extractComment,
|
$extractComment,
|
||||||
$wordTransliteration,
|
|
||||||
$wordLengthMin,
|
$wordLengthMin,
|
||||||
$wordLengthMax
|
$wordLengthMax
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue