Add S3 IG Import Media Storage

This commit is contained in:
Daniel Supernault 2024-02-02 02:29:33 -07:00
parent 5b7111c56f
commit 622e9cee97
No known key found for this signature in database
GPG key ID: 23740873EE6F76A1
4 changed files with 431 additions and 198 deletions

View file

@ -0,0 +1,54 @@
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use App\Models\ImportPost;
use App\Jobs\ImportPipeline\ImportMediaToCloudPipeline;
use function Laravel\Prompts\progress;
class ImportUploadMediaToCloudStorage extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:import-upload-media-to-cloud-storage {--limit=500}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Migrate media imported from Instagram to S3 cloud storage.';
/**
* Execute the console command.
*/
public function handle()
{
if(
(bool) config('import.instagram.storage.cloud.enabled') === false ||
(bool) config_cache('pixelfed.cloud_storage') === false
) {
$this->error('Aborted. Cloud storage is not enabled for IG imports.');
return;
}
$limit = $this->option('limit');
$progress = progress(label: 'Migrating import media', steps: $limit);
$progress->start();
$posts = ImportPost::whereUploadedToS3(false)->take($limit)->get();
foreach($posts as $post) {
ImportMediaToCloudPipeline::dispatch($post)->onQueue('low');
$progress->advance();
}
$progress->finish();
}
}

View file

@ -0,0 +1,124 @@
<?php
namespace App\Jobs\ImportPipeline;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Contracts\Queue\ShouldBeUniqueUntilProcessing;
use App\Models\ImportPost;
use App\Media;
use App\Services\MediaStorageService;
use Illuminate\Support\Facades\Storage;
class ImportMediaToCloudPipeline implements ShouldQueue, ShouldBeUniqueUntilProcessing
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
protected $importPost;
public $timeout = 900;
public $tries = 3;
public $maxExceptions = 1;
public $failOnTimeout = true;
/**
* The number of seconds after which the job's unique lock will be released.
*
* @var int
*/
public $uniqueFor = 3600;
/**
* Get the unique ID for the job.
*/
public function uniqueId(): string
{
return 'import-media-to-cloud-pipeline:ip-id:' . $this->importPost->id;
}
/**
* Get the middleware the job should pass through.
*
* @return array<int, object>
*/
public function middleware(): array
{
return [(new WithoutOverlapping("import-media-to-cloud-pipeline:ip-id:{$this->importPost->id}"))->shared()->dontRelease()];
}
/**
* Delete the job if its models no longer exist.
*
* @var bool
*/
public $deleteWhenMissingModels = true;
/**
* Create a new job instance.
*/
public function __construct(ImportPost $importPost)
{
$this->importPost = $importPost;
}
/**
* Execute the job.
*/
public function handle(): void
{
$ip = $this->importPost;
if(
$ip->status_id === null ||
$ip->uploaded_to_s3 === true ||
(bool) config_cache('pixelfed.cloud_storage') === false) {
return;
}
$media = Media::whereStatusId($ip->status_id)->get();
if(!$media || !$media->count()) {
$importPost = ImportPost::find($ip->id);
$importPost->uploaded_to_s3 = true;
$importPost->save();
return;
}
foreach($media as $mediaPart) {
$this->handleMedia($mediaPart);
}
}
protected function handleMedia($media)
{
$ip = $this->importPost;
$importPost = ImportPost::find($ip->id);
if(!$importPost) {
return;
}
$res = MediaStorageService::move($media);
$importPost->uploaded_to_s3 = true;
$importPost->save();
if(!$res) {
return;
}
if($res === 'invalid file') {
return;
}
if($res === 'success') {
Storage::disk('local')->delete($media->media_path);
}
}
}

View file

@ -21,28 +21,40 @@ use App\Jobs\AvatarPipeline\AvatarStorageCleanup;
class MediaStorageService { class MediaStorageService {
public static function store(Media $media) public static function store(Media $media)
{ {
if(config_cache('pixelfed.cloud_storage') == true) { if(config_cache('pixelfed.cloud_storage') == true) {
(new self())->cloudStore($media); (new self())->cloudStore($media);
} }
return; return;
} }
public static function avatar($avatar, $local = false, $skipRecentCheck = false) public static function move(Media $media)
{ {
return (new self())->fetchAvatar($avatar, $local, $skipRecentCheck); if($media->remote_media) {
} return;
}
public static function head($url) if(config_cache('pixelfed.cloud_storage') == true) {
{ return (new self())->cloudMove($media);
$c = new Client(); }
try { return;
$r = $c->request('HEAD', $url); }
} catch (RequestException $e) {
return false; public static function avatar($avatar, $local = false, $skipRecentCheck = false)
} {
return (new self())->fetchAvatar($avatar, $local, $skipRecentCheck);
}
public static function head($url)
{
$c = new Client();
try {
$r = $c->request('HEAD', $url);
} catch (RequestException $e) {
return false;
}
$h = Arr::mapWithKeys($r->getHeaders(), function($item, $key) { $h = Arr::mapWithKeys($r->getHeaders(), function($item, $key) {
return [strtolower($key) => last($item)]; return [strtolower($key) => last($item)];
@ -55,224 +67,261 @@ class MediaStorageService {
$len = (int) $h['content-length']; $len = (int) $h['content-length'];
$mime = $h['content-type']; $mime = $h['content-type'];
if($len < 10 || $len > ((config_cache('pixelfed.max_photo_size') * 1000))) { if($len < 10 || $len > ((config_cache('pixelfed.max_photo_size') * 1000))) {
return false; return false;
} }
return [ return [
'length' => $len, 'length' => $len,
'mime' => $mime 'mime' => $mime
]; ];
} }
protected function cloudStore($media) protected function cloudStore($media)
{ {
if($media->remote_media == true) { if($media->remote_media == true) {
if(config('media.storage.remote.cloud')) { if(config('media.storage.remote.cloud')) {
(new self())->remoteToCloud($media); (new self())->remoteToCloud($media);
} }
} else { } else {
(new self())->localToCloud($media); (new self())->localToCloud($media);
} }
} }
protected function localToCloud($media) protected function localToCloud($media)
{ {
$path = storage_path('app/'.$media->media_path); $path = storage_path('app/'.$media->media_path);
$thumb = storage_path('app/'.$media->thumbnail_path); $thumb = storage_path('app/'.$media->thumbnail_path);
$p = explode('/', $media->media_path); $p = explode('/', $media->media_path);
$name = array_pop($p); $name = array_pop($p);
$pt = explode('/', $media->thumbnail_path); $pt = explode('/', $media->thumbnail_path);
$thumbname = array_pop($pt); $thumbname = array_pop($pt);
$storagePath = implode('/', $p); $storagePath = implode('/', $p);
$url = ResilientMediaStorageService::store($storagePath, $path, $name); $url = ResilientMediaStorageService::store($storagePath, $path, $name);
if($thumb) { if($thumb) {
$thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname); $thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname);
$media->thumbnail_url = $thumbUrl; $media->thumbnail_url = $thumbUrl;
} }
$media->cdn_url = $url; $media->cdn_url = $url;
$media->optimized_url = $url; $media->optimized_url = $url;
$media->replicated_at = now(); $media->replicated_at = now();
$media->save(); $media->save();
if($media->status_id) { if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id); Cache::forget('status:transformer:media:attachments:' . $media->status_id);
MediaService::del($media->status_id); MediaService::del($media->status_id);
StatusService::del($media->status_id, false); StatusService::del($media->status_id, false);
} }
} }
protected function remoteToCloud($media) protected function remoteToCloud($media)
{ {
$url = $media->remote_url; $url = $media->remote_url;
if(!Helpers::validateUrl($url)) { if(!Helpers::validateUrl($url)) {
return; return;
} }
$head = $this->head($media->remote_url); $head = $this->head($media->remote_url);
if(!$head) { if(!$head) {
return; return;
} }
$mimes = [ $mimes = [
'image/jpeg', 'image/jpeg',
'image/png', 'image/png',
'video/mp4' 'video/mp4'
]; ];
$mime = $head['mime']; $mime = $head['mime'];
$max_size = (int) config_cache('pixelfed.max_photo_size') * 1000; $max_size = (int) config_cache('pixelfed.max_photo_size') * 1000;
$media->size = $head['length']; $media->size = $head['length'];
$media->remote_media = true; $media->remote_media = true;
$media->save(); $media->save();
if(!in_array($mime, $mimes)) { if(!in_array($mime, $mimes)) {
return; return;
} }
if($head['length'] >= $max_size) { if($head['length'] >= $max_size) {
return; return;
} }
switch ($mime) { switch ($mime) {
case 'image/png': case 'image/png':
$ext = '.png'; $ext = '.png';
break; break;
case 'image/gif': case 'image/gif':
$ext = '.gif'; $ext = '.gif';
break; break;
case 'image/jpeg': case 'image/jpeg':
$ext = '.jpg'; $ext = '.jpg';
break; break;
case 'video/mp4': case 'video/mp4':
$ext = '.mp4'; $ext = '.mp4';
break; break;
} }
$base = MediaPathService::get($media->profile); $base = MediaPathService::get($media->profile);
$path = Str::random(40) . $ext; $path = Str::random(40) . $ext;
$tmpBase = storage_path('app/remcache/'); $tmpBase = storage_path('app/remcache/');
$tmpPath = $media->profile_id . '-' . $path; $tmpPath = $media->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath; $tmpName = $tmpBase . $tmpPath;
$data = file_get_contents($url, false, null, 0, $head['length']); $data = file_get_contents($url, false, null, 0, $head['length']);
file_put_contents($tmpName, $data); file_put_contents($tmpName, $data);
$hash = hash_file('sha256', $tmpName); $hash = hash_file('sha256', $tmpName);
$disk = Storage::disk(config('filesystems.cloud')); $disk = Storage::disk(config('filesystems.cloud'));
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public'); $file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file); $permalink = $disk->url($file);
$media->media_path = $file; $media->media_path = $file;
$media->cdn_url = $permalink; $media->cdn_url = $permalink;
$media->original_sha256 = $hash; $media->original_sha256 = $hash;
$media->replicated_at = now(); $media->replicated_at = now();
$media->save(); $media->save();
if($media->status_id) { if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id); Cache::forget('status:transformer:media:attachments:' . $media->status_id);
} }
unlink($tmpName); unlink($tmpName);
} }
protected function fetchAvatar($avatar, $local = false, $skipRecentCheck = false) protected function fetchAvatar($avatar, $local = false, $skipRecentCheck = false)
{ {
$queue = random_int(1, 15) > 5 ? 'mmo' : 'low'; $queue = random_int(1, 15) > 5 ? 'mmo' : 'low';
$url = $avatar->remote_url; $url = $avatar->remote_url;
$driver = $local ? 'local' : config('filesystems.cloud'); $driver = $local ? 'local' : config('filesystems.cloud');
if(empty($url) || Helpers::validateUrl($url) == false) { if(empty($url) || Helpers::validateUrl($url) == false) {
return; return;
} }
$head = $this->head($url); $head = $this->head($url);
if($head == false) { if($head == false) {
return; return;
} }
$mimes = [ $mimes = [
'application/octet-stream', 'application/octet-stream',
'image/jpeg', 'image/jpeg',
'image/png', 'image/png',
]; ];
$mime = $head['mime']; $mime = $head['mime'];
$max_size = (int) config('pixelfed.max_avatar_size') * 1000; $max_size = (int) config('pixelfed.max_avatar_size') * 1000;
if(!$skipRecentCheck) { if(!$skipRecentCheck) {
if($avatar->last_fetched_at && $avatar->last_fetched_at->gt(now()->subMonths(3))) { if($avatar->last_fetched_at && $avatar->last_fetched_at->gt(now()->subMonths(3))) {
return; return;
} }
} }
Cache::forget('avatar:' . $avatar->profile_id); Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id); AccountService::del($avatar->profile_id);
// handle pleroma edge case // handle pleroma edge case
if(Str::endsWith($mime, '; charset=utf-8')) { if(Str::endsWith($mime, '; charset=utf-8')) {
$mime = str_replace('; charset=utf-8', '', $mime); $mime = str_replace('; charset=utf-8', '', $mime);
} }
if(!in_array($mime, $mimes)) { if(!in_array($mime, $mimes)) {
return; return;
} }
if($head['length'] >= $max_size) { if($head['length'] >= $max_size) {
return; return;
} }
$base = ($local ? 'public/cache/' : 'cache/') . 'avatars/' . $avatar->profile_id; $base = ($local ? 'public/cache/' : 'cache/') . 'avatars/' . $avatar->profile_id;
$ext = $head['mime'] == 'image/jpeg' ? 'jpg' : 'png'; $ext = $head['mime'] == 'image/jpeg' ? 'jpg' : 'png';
$path = 'avatar_' . strtolower(Str::random(random_int(3,6))) . '.' . $ext; $path = 'avatar_' . strtolower(Str::random(random_int(3,6))) . '.' . $ext;
$tmpBase = storage_path('app/remcache/'); $tmpBase = storage_path('app/remcache/');
$tmpPath = 'avatar_' . $avatar->profile_id . '-' . $path; $tmpPath = 'avatar_' . $avatar->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath; $tmpName = $tmpBase . $tmpPath;
$data = @file_get_contents($url, false, null, 0, $head['length']); $data = @file_get_contents($url, false, null, 0, $head['length']);
if(!$data) { if(!$data) {
return; return;
} }
file_put_contents($tmpName, $data); file_put_contents($tmpName, $data);
$mimeCheck = Storage::mimeType('remcache/' . $tmpPath); $mimeCheck = Storage::mimeType('remcache/' . $tmpPath);
if(!$mimeCheck || !in_array($mimeCheck, ['image/png', 'image/jpeg'])) { if(!$mimeCheck || !in_array($mimeCheck, ['image/png', 'image/jpeg'])) {
$avatar->last_fetched_at = now(); $avatar->last_fetched_at = now();
$avatar->save(); $avatar->save();
unlink($tmpName); unlink($tmpName);
return; return;
} }
$disk = Storage::disk($driver); $disk = Storage::disk($driver);
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public'); $file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file); $permalink = $disk->url($file);
$avatar->media_path = $base . '/' . $path; $avatar->media_path = $base . '/' . $path;
$avatar->is_remote = true; $avatar->is_remote = true;
$avatar->cdn_url = $local ? config('app.url') . $permalink : $permalink; $avatar->cdn_url = $local ? config('app.url') . $permalink : $permalink;
$avatar->size = $head['length']; $avatar->size = $head['length'];
$avatar->change_count = $avatar->change_count + 1; $avatar->change_count = $avatar->change_count + 1;
$avatar->last_fetched_at = now(); $avatar->last_fetched_at = now();
$avatar->save(); $avatar->save();
Cache::forget('avatar:' . $avatar->profile_id); Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id); AccountService::del($avatar->profile_id);
AvatarStorageCleanup::dispatch($avatar)->onQueue($queue)->delay(now()->addMinutes(random_int(3, 15))); AvatarStorageCleanup::dispatch($avatar)->onQueue($queue)->delay(now()->addMinutes(random_int(3, 15)));
unlink($tmpName); unlink($tmpName);
} }
public static function delete(Media $media, $confirm = false) public static function delete(Media $media, $confirm = false)
{ {
if(!$confirm) { if(!$confirm) {
return; return;
} }
MediaDeletePipeline::dispatch($media)->onQueue('mmo'); MediaDeletePipeline::dispatch($media)->onQueue('mmo');
} }
protected function cloudMove($media)
{
if(!Storage::exists($media->media_path)) {
return 'invalid file';
}
$path = storage_path('app/'.$media->media_path);
$thumb = false;
if($media->thumbnail_path) {
$thumb = storage_path('app/'.$media->thumbnail_path);
$pt = explode('/', $media->thumbnail_path);
$thumbname = array_pop($pt);
}
$p = explode('/', $media->media_path);
$name = array_pop($p);
$storagePath = implode('/', $p);
$url = ResilientMediaStorageService::store($storagePath, $path, $name);
if($thumb) {
$thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname);
$media->thumbnail_url = $thumbUrl;
}
$media->cdn_url = $url;
$media->optimized_url = $url;
$media->replicated_at = now();
$media->save();
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
MediaService::del($media->status_id);
StatusService::del($media->status_id, false);
}
return 'success';
}
} }

View file

@ -39,6 +39,12 @@ return [
// Limit to specific user ids, in comma separated format // Limit to specific user ids, in comma separated format
'user_ids' => env('PF_IMPORT_IG_PERM_ONLY_USER_IDS', null), 'user_ids' => env('PF_IMPORT_IG_PERM_ONLY_USER_IDS', null),
],
'storage' => [
'cloud' => [
'enabled' => env('PF_IMPORT_IG_CLOUD_STORAGE', env('PF_ENABLE_CLOUD', false)),
]
] ]
] ]
]; ];