Update lexer/extractor to handle banned hashtags

This commit is contained in:
Daniel Supernault 2022-12-27 05:23:54 -07:00
parent aed8c865d8
commit 909a8a5a9b
No known key found for this signature in database
GPG key ID: 0DEF1C662C9033F7
3 changed files with 27 additions and 3 deletions

View file

@ -15,6 +15,7 @@ use App\Mention;
use App\Services\AccountService; use App\Services\AccountService;
use App\Hashtag; use App\Hashtag;
use App\StatusHashtag; use App\StatusHashtag;
use App\Services\TrendingHashtagService;
class StatusTagsPipeline implements ShouldQueue class StatusTagsPipeline implements ShouldQueue
{ {
@ -61,6 +62,14 @@ class StatusTagsPipeline implements ShouldQueue
$name = substr($tag['name'], 0, 1) == '#' ? $name = substr($tag['name'], 0, 1) == '#' ?
substr($tag['name'], 1) : $tag['name']; substr($tag['name'], 1) : $tag['name'];
$banned = TrendingHashtagService::getBannedHashtagNames();
if(count($banned)) {
if(in_array(strtolower($name), array_map('strtolower', $banned))) {
continue;
}
}
$hashtag = Hashtag::firstOrCreate([ $hashtag = Hashtag::firstOrCreate([
'slug' => str_slug($name) 'slug' => str_slug($name)
], [ ], [

View file

@ -16,13 +16,20 @@ class TrendingHashtagService
return self::CACHE_KEY . $k; return self::CACHE_KEY . $k;
} }
public static function getBlockedHashtags() public static function getBannedHashtags()
{ {
return Cache::remember(self::key(':is_banned'), 1209600, function() { return Cache::remember(self::key(':is_banned'), 1209600, function() {
return Hashtag::whereIsBanned(true)->pluck('id')->toArray(); return Hashtag::whereIsBanned(true)->pluck('id')->toArray();
}); });
} }
public static function getBannedHashtagNames()
{
return Cache::remember(self::key(':is_banned:names'), 1209600, function() {
return Hashtag::find(self::getBannedHashtags())->pluck('name')->toArray();
});
}
public static function getNonTrendingHashtags() public static function getNonTrendingHashtags()
{ {
return Cache::remember(self::key(':can_trend'), 1209600, function() { return Cache::remember(self::key(':can_trend'), 1209600, function() {
@ -52,7 +59,7 @@ class TrendingHashtagService
{ {
$minId = self::getMinRecentId(); $minId = self::getMinRecentId();
$skipIds = array_merge(self::getBlockedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags()); $skipIds = array_merge(self::getBannedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags());
return Cache::remember(self::CACHE_KEY, config('trending.hashtags.ttl'), function() use($minId, $skipIds) { return Cache::remember(self::CACHE_KEY, config('trending.hashtags.ttl'), function() use($minId, $skipIds) {
return StatusHashtag::select('hashtag_id', \DB::raw('count(*) as total')) return StatusHashtag::select('hashtag_id', \DB::raw('count(*) as total'))

View file

@ -12,6 +12,7 @@ namespace App\Util\Lexer;
use Illuminate\Support\Str; use Illuminate\Support\Str;
use App\Status; use App\Status;
use App\Services\AutolinkService; use App\Services\AutolinkService;
use App\Services\TrendingHashtagService;
/** /**
* Twitter Extractor Class. * Twitter Extractor Class.
@ -267,6 +268,8 @@ class Extractor extends Regex
return []; return [];
} }
$bannedTags = TrendingHashtagService::getBannedHashtagNames();
preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
$tags = []; $tags = [];
@ -278,7 +281,12 @@ class Extractor extends Regex
if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) { if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
continue; continue;
} }
if(mb_strlen($hashtag[0]) > 124) { if (count($bannedTags)) {
if(in_array(strtolower($hashtag[0]), array_map('strtolower', $bannedTags))) {
continue;
}
}
if (mb_strlen($hashtag[0]) > 124) {
continue; continue;
} }
$tags[] = [ $tags[] = [