mirror of
https://github.com/pixelfed/pixelfed.git
synced 2024-11-24 15:31:26 +00:00
Add Autospam Advanced Detection
This commit is contained in:
parent
75db5116b7
commit
132a58de54
15 changed files with 819 additions and 31 deletions
255
app/Http/Controllers/Admin/AdminAutospamController.php
Normal file
255
app/Http/Controllers/Admin/AdminAutospamController.php
Normal file
|
@ -0,0 +1,255 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Http\Controllers\Admin;
|
||||||
|
|
||||||
|
use DB, Cache;
|
||||||
|
use App\{
|
||||||
|
AccountInterstitial,
|
||||||
|
DiscoverCategory,
|
||||||
|
DiscoverCategoryHashtag,
|
||||||
|
Hashtag,
|
||||||
|
Media,
|
||||||
|
Profile,
|
||||||
|
Status,
|
||||||
|
StatusHashtag,
|
||||||
|
User
|
||||||
|
};
|
||||||
|
use App\Models\ConfigCache;
|
||||||
|
use App\Models\AutospamCustomTokens;
|
||||||
|
use App\Services\AccountService;
|
||||||
|
use App\Services\ConfigCacheService;
|
||||||
|
use App\Services\StatusService;
|
||||||
|
use Carbon\Carbon;
|
||||||
|
use Illuminate\Http\Request;
|
||||||
|
use Illuminate\Validation\Rule;
|
||||||
|
use League\ISO3166\ISO3166;
|
||||||
|
use Illuminate\Support\Str;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use Illuminate\Support\Facades\Validator;
|
||||||
|
use Illuminate\Support\Facades\Http;
|
||||||
|
use App\Http\Controllers\PixelfedDirectoryController;
|
||||||
|
use \DateInterval;
|
||||||
|
use \DatePeriod;
|
||||||
|
use App\Http\Resources\AdminSpamReport;
|
||||||
|
use App\Util\Lexer\Classifier;
|
||||||
|
use App\Jobs\AutospamPipeline\AutospamPretrainPipeline;
|
||||||
|
use App\Jobs\AutospamPipeline\AutospamPretrainNonSpamPipeline;
|
||||||
|
use App\Jobs\AutospamPipeline\AutospamUpdateCachedDataPipeline;
|
||||||
|
use Illuminate\Support\Facades\URL;
|
||||||
|
use App\Services\AutospamService;
|
||||||
|
|
||||||
|
trait AdminAutospamController
|
||||||
|
{
|
||||||
|
public function autospamHome(Request $request)
|
||||||
|
{
|
||||||
|
return view('admin.autospam.home');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAutospamConfigApi(Request $request)
|
||||||
|
{
|
||||||
|
$open = Cache::remember('admin-dash:reports:spam-count', 3600, function() {
|
||||||
|
return AccountInterstitial::whereType('post.autospam')->whereNull('appeal_handled_at')->count();
|
||||||
|
});
|
||||||
|
|
||||||
|
$closed = Cache::remember('admin-dash:reports:spam-count-closed', 3600, function() {
|
||||||
|
return AccountInterstitial::whereType('post.autospam')->whereNotNull('appeal_handled_at')->count();
|
||||||
|
});
|
||||||
|
|
||||||
|
$thisWeek = Cache::remember('admin-dash:reports:spam-count-stats-this-week ', 86400, function() {
|
||||||
|
$sr = config('database.default') == 'pgsql' ? "to_char(created_at, 'MM-YYYY')" : "DATE_FORMAT(created_at, '%m-%Y')";
|
||||||
|
$gb = config('database.default') == 'pgsql' ? [DB::raw($sr)] : DB::raw($sr);
|
||||||
|
$s = AccountInterstitial::select(
|
||||||
|
DB::raw('count(id) as count'),
|
||||||
|
DB::raw($sr . " as month_year")
|
||||||
|
)
|
||||||
|
->where('created_at', '>=', now()->subWeeks(52))
|
||||||
|
->groupBy($gb)
|
||||||
|
->get()
|
||||||
|
->map(function($s) {
|
||||||
|
$dt = now()->parse('01-' . $s->month_year);
|
||||||
|
return [
|
||||||
|
'id' => $dt->format('Ym'),
|
||||||
|
'x' => $dt->format('M Y'),
|
||||||
|
'y' => $s->count
|
||||||
|
];
|
||||||
|
})
|
||||||
|
->sortBy('id')
|
||||||
|
->values()
|
||||||
|
->toArray();
|
||||||
|
return $s;
|
||||||
|
});
|
||||||
|
|
||||||
|
$files = [
|
||||||
|
'spam' => [
|
||||||
|
'exists' => Storage::exists(AutospamService::MODEL_SPAM_PATH),
|
||||||
|
'size' => 0
|
||||||
|
],
|
||||||
|
'ham' => [
|
||||||
|
'exists' => Storage::exists(AutospamService::MODEL_HAM_PATH),
|
||||||
|
'size' => 0
|
||||||
|
],
|
||||||
|
'combined' => [
|
||||||
|
'exists' => Storage::exists(AutospamService::MODEL_FILE_PATH),
|
||||||
|
'size' => 0
|
||||||
|
]
|
||||||
|
];
|
||||||
|
|
||||||
|
if($files['spam']['exists']) {
|
||||||
|
$files['spam']['size'] = Storage::size(AutospamService::MODEL_SPAM_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($files['ham']['exists']) {
|
||||||
|
$files['ham']['size'] = Storage::size(AutospamService::MODEL_HAM_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($files['combined']['exists']) {
|
||||||
|
$files['combined']['size'] = Storage::size(AutospamService::MODEL_FILE_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'autospam_enabled' => (bool) config_cache('pixelfed.bouncer.enabled') ?? false,
|
||||||
|
'nlp_enabled' => (bool) AutospamService::active(),
|
||||||
|
'files' => $files,
|
||||||
|
'open' => $open,
|
||||||
|
'closed' => $closed,
|
||||||
|
'graph' => collect($thisWeek)->map(fn($s) => $s['y'])->values(),
|
||||||
|
'graphLabels' => collect($thisWeek)->map(fn($s) => $s['x'])->values()
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAutospamReportsClosedApi(Request $request)
|
||||||
|
{
|
||||||
|
$appeals = AdminSpamReport::collection(
|
||||||
|
AccountInterstitial::orderBy('id', 'desc')
|
||||||
|
->whereType('post.autospam')
|
||||||
|
->whereIsSpam(true)
|
||||||
|
->whereNotNull('appeal_handled_at')
|
||||||
|
->cursorPaginate(6)
|
||||||
|
->withQueryString()
|
||||||
|
);
|
||||||
|
|
||||||
|
return $appeals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function postAutospamTrainSpamApi(Request $request)
|
||||||
|
{
|
||||||
|
$aiCount = AccountInterstitial::whereItemType('App\Status')
|
||||||
|
->whereIsSpam(true)
|
||||||
|
->count();
|
||||||
|
abort_if($aiCount < 100, 422, 'You don\'t have enough data to pre-train against.');
|
||||||
|
|
||||||
|
$existing = Cache::get('pf:admin:autospam:pretrain:recent');
|
||||||
|
abort_if($existing, 422, 'You\'ve already run this recently, please wait 30 minutes before pre-training again');
|
||||||
|
AutospamPretrainPipeline::dispatch();
|
||||||
|
Cache::put('pf:admin:autospam:pretrain:recent', 1, 1440);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'msg' => 'Success!'
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function postAutospamTrainNonSpamSearchApi(Request $request)
|
||||||
|
{
|
||||||
|
$this->validate($request, [
|
||||||
|
'q' => 'required|string|min:1'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$q = $request->input('q');
|
||||||
|
|
||||||
|
$res = Profile::whereNull(['status', 'domain'])
|
||||||
|
->where('username', 'like', '%' . $q . '%')
|
||||||
|
->orderByDesc('followers_count')
|
||||||
|
->take(10)
|
||||||
|
->get()
|
||||||
|
->map(function($p) {
|
||||||
|
$acct = AccountService::get($p->id, true);
|
||||||
|
return [
|
||||||
|
'id' => (string) $p->id,
|
||||||
|
'avatar' => $acct['avatar'],
|
||||||
|
'username' => $p->username
|
||||||
|
];
|
||||||
|
})
|
||||||
|
->values();
|
||||||
|
return $res;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function postAutospamTrainNonSpamSubmitApi(Request $request)
|
||||||
|
{
|
||||||
|
$this->validate($request, [
|
||||||
|
'accounts' => 'required|array|min:1|max:10'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$accts = $request->input('accounts');
|
||||||
|
|
||||||
|
$accounts = Profile::whereNull(['domain', 'status'])->find(collect($accts)->map(function($a) { return $a['id'];}));
|
||||||
|
|
||||||
|
abort_if(!$accounts || !$accounts->count(), 422, 'One or more of the selected accounts are not valid');
|
||||||
|
|
||||||
|
AutospamPretrainNonSpamPipeline::dispatch($accounts);
|
||||||
|
return $accounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAutospamCustomTokensApi(Request $request)
|
||||||
|
{
|
||||||
|
return AutospamCustomTokens::latest()->cursorPaginate(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function saveNewAutospamCustomTokensApi(Request $request)
|
||||||
|
{
|
||||||
|
$this->validate($request, [
|
||||||
|
'token' => 'required|unique:autospam_custom_tokens,token',
|
||||||
|
]);
|
||||||
|
|
||||||
|
$ct = new AutospamCustomTokens;
|
||||||
|
$ct->token = $request->input('token');
|
||||||
|
$ct->weight = $request->input('weight');
|
||||||
|
$ct->category = $request->input('category') === 'spam' ? 'spam' : 'ham';
|
||||||
|
$ct->note = $request->input('note');
|
||||||
|
$ct->active = $request->input('active');
|
||||||
|
$ct->save();
|
||||||
|
|
||||||
|
AutospamUpdateCachedDataPipeline::dispatch();
|
||||||
|
return $ct;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function updateAutospamCustomTokensApi(Request $request)
|
||||||
|
{
|
||||||
|
$this->validate($request, [
|
||||||
|
'id' => 'required',
|
||||||
|
'token' => 'required',
|
||||||
|
'category' => 'required|in:spam,ham',
|
||||||
|
'active' => 'required|boolean'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$ct = AutospamCustomTokens::findOrFail($request->input('id'));
|
||||||
|
$ct->weight = $request->input('weight');
|
||||||
|
$ct->category = $request->input('category');
|
||||||
|
$ct->note = $request->input('note');
|
||||||
|
$ct->active = $request->input('active');
|
||||||
|
$ct->save();
|
||||||
|
|
||||||
|
AutospamUpdateCachedDataPipeline::dispatch();
|
||||||
|
|
||||||
|
return $ct;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function exportAutospamCustomTokensApi(Request $request)
|
||||||
|
{
|
||||||
|
abort_if(!Storage::exists(AutospamService::MODEL_SPAM_PATH), 422, 'Autospam Dataset does not exist, please train spam before attempting to export');
|
||||||
|
return Storage::download(AutospamService::MODEL_SPAM_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function enableAutospamApi(Request $request)
|
||||||
|
{
|
||||||
|
ConfigCacheService::put('autospam.nlp.enabled', true);
|
||||||
|
Cache::forget(AutospamService::CHCKD_CACHE_KEY);
|
||||||
|
return ['msg' => 'Success'];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function disableAutospamApi(Request $request)
|
||||||
|
{
|
||||||
|
ConfigCacheService::put('autospam.nlp.enabled', false);
|
||||||
|
Cache::forget(AutospamService::CHCKD_CACHE_KEY);
|
||||||
|
return ['msg' => 'Success'];
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,6 +21,7 @@ use Carbon\Carbon;
|
||||||
use Illuminate\Http\Request;
|
use Illuminate\Http\Request;
|
||||||
use Illuminate\Support\Facades\Redis;
|
use Illuminate\Support\Facades\Redis;
|
||||||
use App\Http\Controllers\Admin\{
|
use App\Http\Controllers\Admin\{
|
||||||
|
AdminAutospamController,
|
||||||
AdminDirectoryController,
|
AdminDirectoryController,
|
||||||
AdminDiscoverController,
|
AdminDiscoverController,
|
||||||
AdminHashtagsController,
|
AdminHashtagsController,
|
||||||
|
@ -43,6 +44,7 @@ use App\Models\CustomEmoji;
|
||||||
class AdminController extends Controller
|
class AdminController extends Controller
|
||||||
{
|
{
|
||||||
use AdminReportController,
|
use AdminReportController,
|
||||||
|
AdminAutospamController,
|
||||||
AdminDirectoryController,
|
AdminDirectoryController,
|
||||||
AdminDiscoverController,
|
AdminDiscoverController,
|
||||||
AdminHashtagsController,
|
AdminHashtagsController,
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Jobs\AutospamPipeline;
|
||||||
|
|
||||||
|
use Illuminate\Bus\Queueable;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldBeUnique;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||||
|
use Illuminate\Foundation\Bus\Dispatchable;
|
||||||
|
use Illuminate\Queue\InteractsWithQueue;
|
||||||
|
use Illuminate\Queue\SerializesModels;
|
||||||
|
use App\Util\Lexer\Classifier;
|
||||||
|
use App\AccountInterstitial;
|
||||||
|
use App\Profile;
|
||||||
|
use App\Status;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use App\Services\AutospamService;
|
||||||
|
|
||||||
|
class AutospamPretrainNonSpamPipeline implements ShouldQueue
|
||||||
|
{
|
||||||
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||||
|
|
||||||
|
public $classifier;
|
||||||
|
public $accounts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new job instance.
|
||||||
|
*/
|
||||||
|
public function __construct($accounts)
|
||||||
|
{
|
||||||
|
$this->accounts = $accounts;
|
||||||
|
$this->classifier = new Classifier();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the job.
|
||||||
|
*/
|
||||||
|
public function handle(): void
|
||||||
|
{
|
||||||
|
$classifier = $this->classifier;
|
||||||
|
$accounts = $this->accounts;
|
||||||
|
|
||||||
|
foreach($accounts as $acct) {
|
||||||
|
Status::whereNotNull('caption')
|
||||||
|
->whereScope('public')
|
||||||
|
->whereProfileId($acct->id)
|
||||||
|
->inRandomOrder()
|
||||||
|
->take(400)
|
||||||
|
->pluck('caption')
|
||||||
|
->each(function($c) use ($classifier) {
|
||||||
|
$classifier->learn($c, 'ham');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Storage::put(AutospamService::MODEL_HAM_PATH, $classifier->export());
|
||||||
|
|
||||||
|
AutospamUpdateCachedDataPipeline::dispatch()->delay(5);
|
||||||
|
}
|
||||||
|
}
|
63
app/Jobs/AutospamPipeline/AutospamPretrainPipeline.php
Normal file
63
app/Jobs/AutospamPipeline/AutospamPretrainPipeline.php
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Jobs\AutospamPipeline;
|
||||||
|
|
||||||
|
use Illuminate\Bus\Queueable;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldBeUnique;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||||
|
use Illuminate\Foundation\Bus\Dispatchable;
|
||||||
|
use Illuminate\Queue\InteractsWithQueue;
|
||||||
|
use Illuminate\Queue\SerializesModels;
|
||||||
|
use App\Util\Lexer\Classifier;
|
||||||
|
use App\AccountInterstitial;
|
||||||
|
use App\Status;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use App\Services\AutospamService;
|
||||||
|
|
||||||
|
class AutospamPretrainPipeline implements ShouldQueue
|
||||||
|
{
|
||||||
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||||
|
|
||||||
|
public $classifier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new job instance.
|
||||||
|
*/
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->classifier = new Classifier();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the job.
|
||||||
|
*/
|
||||||
|
public function handle(): void
|
||||||
|
{
|
||||||
|
$classifier = $this->classifier;
|
||||||
|
|
||||||
|
$aiCount = AccountInterstitial::whereItemType('App\Status')
|
||||||
|
->whereIsSpam(true)
|
||||||
|
->count();
|
||||||
|
|
||||||
|
if($aiCount < 100) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
AccountInterstitial::whereItemType('App\Status')
|
||||||
|
->whereIsSpam(true)
|
||||||
|
->inRandomOrder()
|
||||||
|
->take(config('autospam.nlp.spam_sample_limit'))
|
||||||
|
->pluck('item_id')
|
||||||
|
->each(function ($ai) use($classifier) {
|
||||||
|
$status = Status::whereNotNull('caption')->find($ai);
|
||||||
|
if(!$status) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$classifier->learn($status->caption, 'spam');
|
||||||
|
});
|
||||||
|
|
||||||
|
Storage::put(AutospamService::MODEL_SPAM_PATH, $classifier->export());
|
||||||
|
|
||||||
|
AutospamUpdateCachedDataPipeline::dispatch()->delay(5);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Jobs\AutospamPipeline;
|
||||||
|
|
||||||
|
use Illuminate\Bus\Queueable;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldBeUnique;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||||
|
use Illuminate\Foundation\Bus\Dispatchable;
|
||||||
|
use Illuminate\Queue\InteractsWithQueue;
|
||||||
|
use Illuminate\Queue\SerializesModels;
|
||||||
|
use App\Models\AutospamCustomTokens;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use App\Services\AutospamService;
|
||||||
|
use Cache;
|
||||||
|
|
||||||
|
class AutospamUpdateCachedDataPipeline implements ShouldQueue
|
||||||
|
{
|
||||||
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new job instance.
|
||||||
|
*/
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the job.
|
||||||
|
*/
|
||||||
|
public function handle(): void
|
||||||
|
{
|
||||||
|
$spam = json_decode(Storage::get(AutospamService::MODEL_SPAM_PATH), true);
|
||||||
|
$newSpam = AutospamCustomTokens::whereCategory('spam')->get();
|
||||||
|
foreach($newSpam as $ns) {
|
||||||
|
$key = strtolower($ns->token);
|
||||||
|
if(isset($spam['words']['spam'][$key])) {
|
||||||
|
$spam['words']['spam'][$key] = $spam['words']['spam'][$key] + $ns->weight;
|
||||||
|
} else {
|
||||||
|
$spam['words']['spam'][$key] = $ns->weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$newSpamCount = count($spam['words']['spam']);
|
||||||
|
$spam['documents']['spam'] = $newSpamCount;
|
||||||
|
arsort($spam['words']['spam']);
|
||||||
|
Storage::put(AutospamService::MODEL_SPAM_PATH, json_encode($spam, JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT));
|
||||||
|
|
||||||
|
$ham = json_decode(Storage::get(AutospamService::MODEL_HAM_PATH), true);
|
||||||
|
$newHam = AutospamCustomTokens::whereCategory('ham')->get();
|
||||||
|
foreach($newHam as $ns) {
|
||||||
|
$key = strtolower($ns->token);
|
||||||
|
if(isset($spam['words']['ham'][$key])) {
|
||||||
|
$ham['words']['ham'][$key] = $ham['words']['ham'][$key] + $ns->weight;
|
||||||
|
} else {
|
||||||
|
$ham['words']['ham'][$key] = $ns->weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$newHamCount = count($ham['words']['ham']);
|
||||||
|
$ham['documents']['ham'] = $newHamCount;
|
||||||
|
arsort($ham['words']['ham']);
|
||||||
|
|
||||||
|
Storage::put(AutospamService::MODEL_HAM_PATH, json_encode($ham, JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT));
|
||||||
|
|
||||||
|
$combined = [
|
||||||
|
'documents' => [
|
||||||
|
'spam' => $newSpamCount,
|
||||||
|
'ham' => $newHamCount,
|
||||||
|
],
|
||||||
|
'words' => [
|
||||||
|
'spam' => $spam['words']['spam'],
|
||||||
|
'ham' => $ham['words']['ham']
|
||||||
|
]
|
||||||
|
];
|
||||||
|
|
||||||
|
Storage::put(AutospamService::MODEL_FILE_PATH, json_encode($combined, JSON_PRETTY_PRINT,JSON_UNESCAPED_SLASHES));
|
||||||
|
Cache::forget(AutospamService::MODEL_CACHE_KEY);
|
||||||
|
Cache::forget(AutospamService::CHCKD_CACHE_KEY);
|
||||||
|
}
|
||||||
|
}
|
78
app/Services/AutospamService.php
Normal file
78
app/Services/AutospamService.php
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Services;
|
||||||
|
|
||||||
|
use Illuminate\Support\Facades\Cache;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use App\Util\Lexer\Classifier;
|
||||||
|
|
||||||
|
class AutospamService
|
||||||
|
{
|
||||||
|
const CHCKD_CACHE_KEY = 'pf:services:autospam:nlp:checked';
|
||||||
|
const MODEL_CACHE_KEY = 'pf:services:autospam:nlp:model-cache';
|
||||||
|
const MODEL_FILE_PATH = 'nlp/active-training-data.json';
|
||||||
|
const MODEL_SPAM_PATH = 'nlp/spam.json';
|
||||||
|
const MODEL_HAM_PATH = 'nlp/ham.json';
|
||||||
|
|
||||||
|
public static function check($text)
|
||||||
|
{
|
||||||
|
if(!$text || strlen($text) == 0) {
|
||||||
|
false;
|
||||||
|
}
|
||||||
|
if(!self::active()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
$model = self::getCachedModel();
|
||||||
|
$classifier = new Classifier;
|
||||||
|
$classifier->import($model['documents'], $model['words']);
|
||||||
|
return $classifier->most($text) === 'spam';
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function eligible()
|
||||||
|
{
|
||||||
|
return Cache::remember(self::CHCKD_CACHE_KEY, 86400, function() {
|
||||||
|
if(!config_cache('pixelfed.bouncer.enabled') || !config('autospam.enabled')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!Storage::exists(self::MODEL_SPAM_PATH)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!Storage::exists(self::MODEL_HAM_PATH)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!Storage::exists(self::MODEL_FILE_PATH)) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
if(Storage::size(self::MODEL_FILE_PATH) < 1000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function active()
|
||||||
|
{
|
||||||
|
return config_cache('autospam.nlp.enabled') && self::eligible();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function getCachedModel()
|
||||||
|
{
|
||||||
|
if(!self::active()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Cache::remember(self::MODEL_CACHE_KEY, 86400, function() {
|
||||||
|
$res = Storage::get(self::MODEL_FILE_PATH);
|
||||||
|
if(!$res || empty($res)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return json_decode($res, true);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -69,7 +69,9 @@ class ConfigCacheService
|
||||||
'instance.landing.show_directory',
|
'instance.landing.show_directory',
|
||||||
'instance.landing.show_explore',
|
'instance.landing.show_explore',
|
||||||
'instance.admin.pid',
|
'instance.admin.pid',
|
||||||
'instance.banner.blurhash'
|
'instance.banner.blurhash',
|
||||||
|
|
||||||
|
'autospam.nlp.enabled',
|
||||||
// 'system.user_mode'
|
// 'system.user_mode'
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
178
app/Util/Lexer/Classifier.php
Normal file
178
app/Util/Lexer/Classifier.php
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Util\Lexer;
|
||||||
|
|
||||||
|
use Brick\Math\BigDecimal;
|
||||||
|
use Illuminate\Support\Collection;
|
||||||
|
use Illuminate\Support\Str;
|
||||||
|
|
||||||
|
class Classifier
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var ?callable(string): array<int, string>
|
||||||
|
*/
|
||||||
|
private $tokenizer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array<string, array<string, int>>
|
||||||
|
*/
|
||||||
|
private array $words = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array<string, int>
|
||||||
|
*/
|
||||||
|
private array $documents = [];
|
||||||
|
|
||||||
|
private bool $uneven = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param callable(string): array<int, string> $tokenizer
|
||||||
|
*/
|
||||||
|
public function setTokenizer(callable $tokenizer): void
|
||||||
|
{
|
||||||
|
$this->tokenizer = $tokenizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Collection<int, string>
|
||||||
|
*/
|
||||||
|
public function tokenize(string $string): Collection
|
||||||
|
{
|
||||||
|
if ($this->tokenizer) {
|
||||||
|
/** @var array<int, string> */
|
||||||
|
$tokens = call_user_func($this->tokenizer, $string);
|
||||||
|
|
||||||
|
return collect($tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Str::of($string)
|
||||||
|
->lower()
|
||||||
|
->matchAll('/[[:alpha:]]+/u');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return $this
|
||||||
|
*/
|
||||||
|
public function learn(string $statement, string $type): self
|
||||||
|
{
|
||||||
|
foreach ($this->tokenize($statement) as $word) {
|
||||||
|
$this->incrementWord($type, $word);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->incrementType($type);
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Collection<string, string>
|
||||||
|
*/
|
||||||
|
public function guess(string $statement): Collection
|
||||||
|
{
|
||||||
|
$words = $this->tokenize($statement);
|
||||||
|
|
||||||
|
return collect($this->documents)
|
||||||
|
->map(function ($count, string $type) use ($words) {
|
||||||
|
$likelihood = $this->pTotal($type);
|
||||||
|
|
||||||
|
foreach ($words as $word) {
|
||||||
|
$likelihood *= $this->p($word, $type);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (string) BigDecimal::of($likelihood);
|
||||||
|
})
|
||||||
|
->sortDesc();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function most(string $statement): string
|
||||||
|
{
|
||||||
|
/** @var string */
|
||||||
|
return $this->guess($statement)->keys()->first();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return self
|
||||||
|
*/
|
||||||
|
public function uneven(bool $enabled = false): self
|
||||||
|
{
|
||||||
|
$this->uneven = $enabled;
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the document count for the type
|
||||||
|
*/
|
||||||
|
private function incrementType(string $type): void
|
||||||
|
{
|
||||||
|
if (! isset($this->documents[$type])) {
|
||||||
|
$this->documents[$type] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->documents[$type]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the word count for the given type
|
||||||
|
*/
|
||||||
|
private function incrementWord(string $type, string $word): void
|
||||||
|
{
|
||||||
|
$ignored = config('autospam.ignored_tokens');
|
||||||
|
if(!$ignored) {
|
||||||
|
$ignored = ['the', 'a', 'of', 'and'];
|
||||||
|
} else {
|
||||||
|
$ignored = explode(',', $ignored);
|
||||||
|
}
|
||||||
|
if ($type == 'spam' && in_array($word, $ignored)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (! isset($this->words[$type][$word])) {
|
||||||
|
$this->words[$type][$word] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->words[$type][$word]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return float|int
|
||||||
|
*/
|
||||||
|
private function p(string $word, string $type)
|
||||||
|
{
|
||||||
|
$count = $this->words[$type][$word] ?? 0;
|
||||||
|
|
||||||
|
return ($count + 1) / (array_sum($this->words[$type]) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return float|int
|
||||||
|
*/
|
||||||
|
private function pTotal(string $type)
|
||||||
|
{
|
||||||
|
return $this->uneven
|
||||||
|
? ($this->documents[$type] + 1) / (array_sum($this->documents) + 1)
|
||||||
|
: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function export()
|
||||||
|
{
|
||||||
|
$words = $this->words;
|
||||||
|
$words = collect($words)
|
||||||
|
->map(function($w) {
|
||||||
|
arsort($w);
|
||||||
|
return $w;
|
||||||
|
})
|
||||||
|
->all();
|
||||||
|
return json_encode([
|
||||||
|
'_ns' => 'https://pixelfed.org/ns/nlp',
|
||||||
|
'_v' => '1.0',
|
||||||
|
'documents' => $this->documents,
|
||||||
|
'words' => $words
|
||||||
|
], JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function import($documents, $words)
|
||||||
|
{
|
||||||
|
$this->documents = $documents;
|
||||||
|
$this->words = $words;
|
||||||
|
}
|
||||||
|
}
|
BIN
public/js/admin.js
vendored
BIN
public/js/admin.js
vendored
Binary file not shown.
Binary file not shown.
12
resources/views/admin/autospam/home.blade.php
Normal file
12
resources/views/admin/autospam/home.blade.php
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
@extends('admin.partial.template-full')
|
||||||
|
|
||||||
|
@section('section')
|
||||||
|
</div>
|
||||||
|
<admin-autospam />
|
||||||
|
@endsection
|
||||||
|
|
||||||
|
@push('scripts')
|
||||||
|
<script type="text/javascript">
|
||||||
|
new Vue({ el: '#panel'});
|
||||||
|
</script>
|
||||||
|
@endpush
|
|
@ -1,31 +1,76 @@
|
||||||
@section('menu')
|
<div class="col-12 col-md-3">
|
||||||
<ul class="nav flex-column settings-nav">
|
<ul class="nav flex-column settings-nav">
|
||||||
<li class="nav-item pl-3">
|
<li class="nav-item pl-3 {{request()->is('settings/home')?'active':''}}">
|
||||||
<a class="nav-link text-muted {{request()->is('*settings') ? 'font-weight-bold':''}}" href="{{route('admin.settings')}}">Home</a>
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings')}}">Account</a>
|
||||||
</li>
|
</li>
|
||||||
{{-- <li class="nav-item pl-3">
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/accessibility')?'active':''}}">
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/backups') ? 'font-weight-bold':''}}" href="{{route('admin.settings.backups')}}">Backups</a>
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.accessibility')}}">Accessibility</a>
|
||||||
</li> --}}
|
|
||||||
{{-- <li class="nav-item pl-3">
|
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/config') ? 'font-weight-bold':''}}" href="{{route('admin.settings.config')}}">Configuration</a>
|
|
||||||
</li> --}}
|
|
||||||
{{-- <li class="nav-item pl-3">
|
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/customize') ? 'font-weight-bold':''}}" href="{{route('admin.settings.customize')}}">Customize</a>
|
|
||||||
</li> --}}
|
|
||||||
{{-- <li class="nav-item pl-3">
|
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/features') ? 'font-weight-bold':''}}" href="{{route('admin.settings.features')}}">Features</a>
|
|
||||||
</li> --}}
|
|
||||||
{{-- <li class="nav-item pl-3">
|
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/maintenance') ? 'font-weight-bold':''}}" href="{{route('admin.settings.maintenance')}}">Maintenance</a>
|
|
||||||
</li> --}}
|
|
||||||
<li class="nav-item pl-3">
|
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/page*') ? 'font-weight-bold':''}}" href="{{route('admin.settings.pages')}}">Pages</a>
|
|
||||||
</li>
|
</li>
|
||||||
{{-- <li class="nav-item pl-3">
|
<li class="nav-item pl-3 {{request()->is('settings/email')?'active':''}}">
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/storage') ? 'font-weight-bold':''}}" href="{{route('admin.settings.storage')}}">Storage</a>
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.email')}}">Email</a>
|
||||||
</li> --}}
|
</li> --}}
|
||||||
<li class="nav-item pl-3">
|
@if(config('pixelfed.user_invites.enabled'))
|
||||||
<a class="nav-link text-muted {{request()->is('*settings/system') ? 'font-weight-bold':''}}" href="{{route('admin.settings.system')}}">System</a>
|
<li class="nav-item pl-3 {{request()->is('settings/invites*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.invites')}}">Invites</a>
|
||||||
|
</li>
|
||||||
|
@endif
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/notifications')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.notifications')}}">Notifications</a>
|
||||||
|
</li>
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/password')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.password')}}">Password</a>
|
||||||
|
</li> --}}
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/privacy*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.privacy')}}">Privacy</a>
|
||||||
|
</li>
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/relationships*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.relationships')}}">Relationships</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/reports*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.reports')}}">Reports</a>
|
||||||
|
</li> --}}
|
||||||
|
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/safety*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.security')}}">Safety</a>
|
||||||
|
</li> --}}
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/security*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.security')}}">Security</a>
|
||||||
|
</li>
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/sponsor*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.sponsor')}}">Sponsor</a>
|
||||||
|
</li> --}}
|
||||||
|
<li class="nav-item">
|
||||||
|
<hr>
|
||||||
|
</li>
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('*import*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.import')}}">Import</a>
|
||||||
|
</li> --}}
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/import*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.import')}}">Import</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/data-export')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.dataexport')}}">Data Export</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
@if(config('pixelfed.oauth_enabled') == true)
|
||||||
|
{{-- <li class="nav-item pl-3 {{request()->is('settings/applications')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.applications')}}">Applications</a>
|
||||||
|
</li> --}}
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/developers')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.developers')}}">Developers</a>
|
||||||
|
</li>
|
||||||
|
@endif
|
||||||
|
|
||||||
|
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/labs*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.labs')}}">Labs</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/arch*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.labs')}}">Archived Posts</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item pl-3 {{request()->is('settings/moglod*')?'active':''}}">
|
||||||
|
<a class="nav-link font-weight-light text-muted" href="{{route('settings.labs')}}">Moderation Log</a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
@endsection
|
</div>
|
||||||
|
|
|
@ -94,6 +94,8 @@ Route::domain(config('pixelfed.domain.admin'))->prefix('i/admin')->group(functio
|
||||||
|
|
||||||
Route::get('directory/home', 'AdminController@directoryHome')->name('admin.directory');
|
Route::get('directory/home', 'AdminController@directoryHome')->name('admin.directory');
|
||||||
|
|
||||||
|
Route::get('autospam/home', 'AdminController@autospamHome')->name('admin.autospam');
|
||||||
|
|
||||||
Route::prefix('api')->group(function() {
|
Route::prefix('api')->group(function() {
|
||||||
Route::get('stats', 'AdminController@getStats');
|
Route::get('stats', 'AdminController@getStats');
|
||||||
Route::get('accounts', 'AdminController@getAccounts');
|
Route::get('accounts', 'AdminController@getAccounts');
|
||||||
|
@ -129,6 +131,17 @@ Route::domain(config('pixelfed.domain.admin'))->prefix('i/admin')->group(functio
|
||||||
Route::get('reports/spam/all', 'AdminController@reportsApiSpamAll');
|
Route::get('reports/spam/all', 'AdminController@reportsApiSpamAll');
|
||||||
Route::get('reports/spam/get/{id}', 'AdminController@reportsApiSpamGet');
|
Route::get('reports/spam/get/{id}', 'AdminController@reportsApiSpamGet');
|
||||||
Route::post('reports/spam/handle', 'AdminController@reportsApiSpamHandle');
|
Route::post('reports/spam/handle', 'AdminController@reportsApiSpamHandle');
|
||||||
|
Route::post('autospam/config', 'AdminController@getAutospamConfigApi');
|
||||||
|
Route::post('autospam/reports/closed', 'AdminController@getAutospamReportsClosedApi');
|
||||||
|
Route::post('autospam/train', 'AdminController@postAutospamTrainSpamApi');
|
||||||
|
Route::post('autospam/search/non-spam', 'AdminController@postAutospamTrainNonSpamSearchApi');
|
||||||
|
Route::post('autospam/train/non-spam', 'AdminController@postAutospamTrainNonSpamSubmitApi');
|
||||||
|
Route::post('autospam/tokens/custom', 'AdminController@getAutospamCustomTokensApi');
|
||||||
|
Route::post('autospam/tokens/store', 'AdminController@saveNewAutospamCustomTokensApi');
|
||||||
|
Route::post('autospam/tokens/update', 'AdminController@updateAutospamCustomTokensApi');
|
||||||
|
Route::post('autospam/tokens/export', 'AdminController@exportAutospamCustomTokensApi');
|
||||||
|
Route::post('autospam/config/enable', 'AdminController@enableAutospamApi');
|
||||||
|
Route::post('autospam/config/disable', 'AdminController@disableAutospamApi');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
1
storage/app/.gitignore
vendored
1
storage/app/.gitignore
vendored
|
@ -2,5 +2,6 @@
|
||||||
!backups/
|
!backups/
|
||||||
!public/
|
!public/
|
||||||
!remcache/
|
!remcache/
|
||||||
|
!nlp/
|
||||||
!cities.json
|
!cities.json
|
||||||
!.gitignore
|
!.gitignore
|
||||||
|
|
2
storage/app/nlp/.gitignore
vendored
Normal file
2
storage/app/nlp/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
Loading…
Reference in a new issue