Update lexer/extractor to handle banned hashtags

This commit is contained in:
Daniel Supernault 2022-12-27 05:23:54 -07:00
parent aed8c865d8
commit 909a8a5a9b
No known key found for this signature in database
GPG key ID: 0DEF1C662C9033F7
3 changed files with 27 additions and 3 deletions

View file

@ -15,6 +15,7 @@ use App\Mention;
use App\Services\AccountService;
use App\Hashtag;
use App\StatusHashtag;
use App\Services\TrendingHashtagService;
class StatusTagsPipeline implements ShouldQueue
{
@ -61,6 +62,14 @@ class StatusTagsPipeline implements ShouldQueue
$name = substr($tag['name'], 0, 1) == '#' ?
substr($tag['name'], 1) : $tag['name'];
$banned = TrendingHashtagService::getBannedHashtagNames();
if(count($banned)) {
if(in_array(strtolower($name), array_map('strtolower', $banned))) {
continue;
}
}
$hashtag = Hashtag::firstOrCreate([
'slug' => str_slug($name)
], [

View file

@ -16,13 +16,20 @@ class TrendingHashtagService
return self::CACHE_KEY . $k;
}
public static function getBlockedHashtags()
public static function getBannedHashtags()
{
return Cache::remember(self::key(':is_banned'), 1209600, function() {
return Hashtag::whereIsBanned(true)->pluck('id')->toArray();
});
}
public static function getBannedHashtagNames()
{
return Cache::remember(self::key(':is_banned:names'), 1209600, function() {
return Hashtag::find(self::getBannedHashtags())->pluck('name')->toArray();
});
}
public static function getNonTrendingHashtags()
{
return Cache::remember(self::key(':can_trend'), 1209600, function() {
@ -52,7 +59,7 @@ class TrendingHashtagService
{
$minId = self::getMinRecentId();
$skipIds = array_merge(self::getBlockedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags());
$skipIds = array_merge(self::getBannedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags());
return Cache::remember(self::CACHE_KEY, config('trending.hashtags.ttl'), function() use($minId, $skipIds) {
return StatusHashtag::select('hashtag_id', \DB::raw('count(*) as total'))

View file

@ -12,6 +12,7 @@ namespace App\Util\Lexer;
use Illuminate\Support\Str;
use App\Status;
use App\Services\AutolinkService;
use App\Services\TrendingHashtagService;
/**
* Twitter Extractor Class.
@ -267,6 +268,8 @@ class Extractor extends Regex
return [];
}
$bannedTags = TrendingHashtagService::getBannedHashtagNames();
preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
$tags = [];
@ -278,7 +281,12 @@ class Extractor extends Regex
if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
continue;
}
if(mb_strlen($hashtag[0]) > 124) {
if (count($bannedTags)) {
if(in_array(strtolower($hashtag[0]), array_map('strtolower', $bannedTags))) {
continue;
}
}
if (mb_strlen($hashtag[0]) > 124) {
continue;
}
$tags[] = [