Add S3 IG Import Media Storage

This commit is contained in:
Daniel Supernault 2024-02-02 02:29:33 -07:00
parent 5b7111c56f
commit 622e9cee97
No known key found for this signature in database
GPG key ID: 23740873EE6F76A1
4 changed files with 431 additions and 198 deletions

View file

@ -0,0 +1,54 @@
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use App\Models\ImportPost;
use App\Jobs\ImportPipeline\ImportMediaToCloudPipeline;
use function Laravel\Prompts\progress;
class ImportUploadMediaToCloudStorage extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:import-upload-media-to-cloud-storage {--limit=500}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Migrate media imported from Instagram to S3 cloud storage.';
/**
* Execute the console command.
*/
public function handle()
{
if(
(bool) config('import.instagram.storage.cloud.enabled') === false ||
(bool) config_cache('pixelfed.cloud_storage') === false
) {
$this->error('Aborted. Cloud storage is not enabled for IG imports.');
return;
}
$limit = $this->option('limit');
$progress = progress(label: 'Migrating import media', steps: $limit);
$progress->start();
$posts = ImportPost::whereUploadedToS3(false)->take($limit)->get();
foreach($posts as $post) {
ImportMediaToCloudPipeline::dispatch($post)->onQueue('low');
$progress->advance();
}
$progress->finish();
}
}

View file

@ -0,0 +1,124 @@
<?php
namespace App\Jobs\ImportPipeline;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Contracts\Queue\ShouldBeUniqueUntilProcessing;
use App\Models\ImportPost;
use App\Media;
use App\Services\MediaStorageService;
use Illuminate\Support\Facades\Storage;
class ImportMediaToCloudPipeline implements ShouldQueue, ShouldBeUniqueUntilProcessing
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
protected $importPost;
public $timeout = 900;
public $tries = 3;
public $maxExceptions = 1;
public $failOnTimeout = true;
/**
* The number of seconds after which the job's unique lock will be released.
*
* @var int
*/
public $uniqueFor = 3600;
/**
* Get the unique ID for the job.
*/
public function uniqueId(): string
{
return 'import-media-to-cloud-pipeline:ip-id:' . $this->importPost->id;
}
/**
* Get the middleware the job should pass through.
*
* @return array<int, object>
*/
public function middleware(): array
{
return [(new WithoutOverlapping("import-media-to-cloud-pipeline:ip-id:{$this->importPost->id}"))->shared()->dontRelease()];
}
/**
* Delete the job if its models no longer exist.
*
* @var bool
*/
public $deleteWhenMissingModels = true;
/**
* Create a new job instance.
*/
public function __construct(ImportPost $importPost)
{
$this->importPost = $importPost;
}
/**
* Execute the job.
*/
public function handle(): void
{
$ip = $this->importPost;
if(
$ip->status_id === null ||
$ip->uploaded_to_s3 === true ||
(bool) config_cache('pixelfed.cloud_storage') === false) {
return;
}
$media = Media::whereStatusId($ip->status_id)->get();
if(!$media || !$media->count()) {
$importPost = ImportPost::find($ip->id);
$importPost->uploaded_to_s3 = true;
$importPost->save();
return;
}
foreach($media as $mediaPart) {
$this->handleMedia($mediaPart);
}
}
protected function handleMedia($media)
{
$ip = $this->importPost;
$importPost = ImportPost::find($ip->id);
if(!$importPost) {
return;
}
$res = MediaStorageService::move($media);
$importPost->uploaded_to_s3 = true;
$importPost->save();
if(!$res) {
return;
}
if($res === 'invalid file') {
return;
}
if($res === 'success') {
Storage::disk('local')->delete($media->media_path);
}
}
}

View file

@ -21,28 +21,40 @@ use App\Jobs\AvatarPipeline\AvatarStorageCleanup;
class MediaStorageService {
public static function store(Media $media)
{
if(config_cache('pixelfed.cloud_storage') == true) {
(new self())->cloudStore($media);
}
public static function store(Media $media)
{
if(config_cache('pixelfed.cloud_storage') == true) {
(new self())->cloudStore($media);
}
return;
}
return;
}
public static function avatar($avatar, $local = false, $skipRecentCheck = false)
{
return (new self())->fetchAvatar($avatar, $local, $skipRecentCheck);
}
public static function move(Media $media)
{
if($media->remote_media) {
return;
}
public static function head($url)
{
$c = new Client();
try {
$r = $c->request('HEAD', $url);
} catch (RequestException $e) {
return false;
}
if(config_cache('pixelfed.cloud_storage') == true) {
return (new self())->cloudMove($media);
}
return;
}
public static function avatar($avatar, $local = false, $skipRecentCheck = false)
{
return (new self())->fetchAvatar($avatar, $local, $skipRecentCheck);
}
public static function head($url)
{
$c = new Client();
try {
$r = $c->request('HEAD', $url);
} catch (RequestException $e) {
return false;
}
$h = Arr::mapWithKeys($r->getHeaders(), function($item, $key) {
return [strtolower($key) => last($item)];
@ -55,224 +67,261 @@ class MediaStorageService {
$len = (int) $h['content-length'];
$mime = $h['content-type'];
if($len < 10 || $len > ((config_cache('pixelfed.max_photo_size') * 1000))) {
return false;
}
if($len < 10 || $len > ((config_cache('pixelfed.max_photo_size') * 1000))) {
return false;
}
return [
'length' => $len,
'mime' => $mime
];
}
return [
'length' => $len,
'mime' => $mime
];
}
protected function cloudStore($media)
{
if($media->remote_media == true) {
if(config('media.storage.remote.cloud')) {
(new self())->remoteToCloud($media);
}
} else {
(new self())->localToCloud($media);
}
}
protected function cloudStore($media)
{
if($media->remote_media == true) {
if(config('media.storage.remote.cloud')) {
(new self())->remoteToCloud($media);
}
} else {
(new self())->localToCloud($media);
}
}
protected function localToCloud($media)
{
$path = storage_path('app/'.$media->media_path);
$thumb = storage_path('app/'.$media->thumbnail_path);
protected function localToCloud($media)
{
$path = storage_path('app/'.$media->media_path);
$thumb = storage_path('app/'.$media->thumbnail_path);
$p = explode('/', $media->media_path);
$name = array_pop($p);
$pt = explode('/', $media->thumbnail_path);
$thumbname = array_pop($pt);
$storagePath = implode('/', $p);
$p = explode('/', $media->media_path);
$name = array_pop($p);
$pt = explode('/', $media->thumbnail_path);
$thumbname = array_pop($pt);
$storagePath = implode('/', $p);
$url = ResilientMediaStorageService::store($storagePath, $path, $name);
if($thumb) {
$thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname);
$media->thumbnail_url = $thumbUrl;
}
$media->cdn_url = $url;
$media->optimized_url = $url;
$media->replicated_at = now();
$media->save();
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
MediaService::del($media->status_id);
StatusService::del($media->status_id, false);
}
}
$url = ResilientMediaStorageService::store($storagePath, $path, $name);
if($thumb) {
$thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname);
$media->thumbnail_url = $thumbUrl;
}
$media->cdn_url = $url;
$media->optimized_url = $url;
$media->replicated_at = now();
$media->save();
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
MediaService::del($media->status_id);
StatusService::del($media->status_id, false);
}
}
protected function remoteToCloud($media)
{
$url = $media->remote_url;
protected function remoteToCloud($media)
{
$url = $media->remote_url;
if(!Helpers::validateUrl($url)) {
return;
}
if(!Helpers::validateUrl($url)) {
return;
}
$head = $this->head($media->remote_url);
$head = $this->head($media->remote_url);
if(!$head) {
return;
}
if(!$head) {
return;
}
$mimes = [
'image/jpeg',
'image/png',
'video/mp4'
];
$mimes = [
'image/jpeg',
'image/png',
'video/mp4'
];
$mime = $head['mime'];
$max_size = (int) config_cache('pixelfed.max_photo_size') * 1000;
$media->size = $head['length'];
$media->remote_media = true;
$media->save();
$mime = $head['mime'];
$max_size = (int) config_cache('pixelfed.max_photo_size') * 1000;
$media->size = $head['length'];
$media->remote_media = true;
$media->save();
if(!in_array($mime, $mimes)) {
return;
}
if(!in_array($mime, $mimes)) {
return;
}
if($head['length'] >= $max_size) {
return;
}
if($head['length'] >= $max_size) {
return;
}
switch ($mime) {
case 'image/png':
$ext = '.png';
break;
switch ($mime) {
case 'image/png':
$ext = '.png';
break;
case 'image/gif':
$ext = '.gif';
break;
case 'image/gif':
$ext = '.gif';
break;
case 'image/jpeg':
$ext = '.jpg';
break;
case 'image/jpeg':
$ext = '.jpg';
break;
case 'video/mp4':
$ext = '.mp4';
break;
}
case 'video/mp4':
$ext = '.mp4';
break;
}
$base = MediaPathService::get($media->profile);
$path = Str::random(40) . $ext;
$tmpBase = storage_path('app/remcache/');
$tmpPath = $media->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath;
$data = file_get_contents($url, false, null, 0, $head['length']);
file_put_contents($tmpName, $data);
$hash = hash_file('sha256', $tmpName);
$base = MediaPathService::get($media->profile);
$path = Str::random(40) . $ext;
$tmpBase = storage_path('app/remcache/');
$tmpPath = $media->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath;
$data = file_get_contents($url, false, null, 0, $head['length']);
file_put_contents($tmpName, $data);
$hash = hash_file('sha256', $tmpName);
$disk = Storage::disk(config('filesystems.cloud'));
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file);
$disk = Storage::disk(config('filesystems.cloud'));
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file);
$media->media_path = $file;
$media->cdn_url = $permalink;
$media->original_sha256 = $hash;
$media->replicated_at = now();
$media->save();
$media->media_path = $file;
$media->cdn_url = $permalink;
$media->original_sha256 = $hash;
$media->replicated_at = now();
$media->save();
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
}
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
}
unlink($tmpName);
}
unlink($tmpName);
}
protected function fetchAvatar($avatar, $local = false, $skipRecentCheck = false)
{
$queue = random_int(1, 15) > 5 ? 'mmo' : 'low';
$url = $avatar->remote_url;
$driver = $local ? 'local' : config('filesystems.cloud');
protected function fetchAvatar($avatar, $local = false, $skipRecentCheck = false)
{
$queue = random_int(1, 15) > 5 ? 'mmo' : 'low';
$url = $avatar->remote_url;
$driver = $local ? 'local' : config('filesystems.cloud');
if(empty($url) || Helpers::validateUrl($url) == false) {
return;
}
if(empty($url) || Helpers::validateUrl($url) == false) {
return;
}
$head = $this->head($url);
$head = $this->head($url);
if($head == false) {
return;
}
if($head == false) {
return;
}
$mimes = [
'application/octet-stream',
'image/jpeg',
'image/png',
];
$mimes = [
'application/octet-stream',
'image/jpeg',
'image/png',
];
$mime = $head['mime'];
$max_size = (int) config('pixelfed.max_avatar_size') * 1000;
$mime = $head['mime'];
$max_size = (int) config('pixelfed.max_avatar_size') * 1000;
if(!$skipRecentCheck) {
if($avatar->last_fetched_at && $avatar->last_fetched_at->gt(now()->subMonths(3))) {
return;
}
}
if(!$skipRecentCheck) {
if($avatar->last_fetched_at && $avatar->last_fetched_at->gt(now()->subMonths(3))) {
return;
}
}
Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id);
Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id);
// handle pleroma edge case
if(Str::endsWith($mime, '; charset=utf-8')) {
$mime = str_replace('; charset=utf-8', '', $mime);
}
// handle pleroma edge case
if(Str::endsWith($mime, '; charset=utf-8')) {
$mime = str_replace('; charset=utf-8', '', $mime);
}
if(!in_array($mime, $mimes)) {
return;
}
if(!in_array($mime, $mimes)) {
return;
}
if($head['length'] >= $max_size) {
return;
}
if($head['length'] >= $max_size) {
return;
}
$base = ($local ? 'public/cache/' : 'cache/') . 'avatars/' . $avatar->profile_id;
$ext = $head['mime'] == 'image/jpeg' ? 'jpg' : 'png';
$path = 'avatar_' . strtolower(Str::random(random_int(3,6))) . '.' . $ext;
$tmpBase = storage_path('app/remcache/');
$tmpPath = 'avatar_' . $avatar->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath;
$data = @file_get_contents($url, false, null, 0, $head['length']);
if(!$data) {
return;
}
file_put_contents($tmpName, $data);
$base = ($local ? 'public/cache/' : 'cache/') . 'avatars/' . $avatar->profile_id;
$ext = $head['mime'] == 'image/jpeg' ? 'jpg' : 'png';
$path = 'avatar_' . strtolower(Str::random(random_int(3,6))) . '.' . $ext;
$tmpBase = storage_path('app/remcache/');
$tmpPath = 'avatar_' . $avatar->profile_id . '-' . $path;
$tmpName = $tmpBase . $tmpPath;
$data = @file_get_contents($url, false, null, 0, $head['length']);
if(!$data) {
return;
}
file_put_contents($tmpName, $data);
$mimeCheck = Storage::mimeType('remcache/' . $tmpPath);
$mimeCheck = Storage::mimeType('remcache/' . $tmpPath);
if(!$mimeCheck || !in_array($mimeCheck, ['image/png', 'image/jpeg'])) {
$avatar->last_fetched_at = now();
$avatar->save();
unlink($tmpName);
return;
}
if(!$mimeCheck || !in_array($mimeCheck, ['image/png', 'image/jpeg'])) {
$avatar->last_fetched_at = now();
$avatar->save();
unlink($tmpName);
return;
}
$disk = Storage::disk($driver);
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file);
$disk = Storage::disk($driver);
$file = $disk->putFileAs($base, new File($tmpName), $path, 'public');
$permalink = $disk->url($file);
$avatar->media_path = $base . '/' . $path;
$avatar->is_remote = true;
$avatar->cdn_url = $local ? config('app.url') . $permalink : $permalink;
$avatar->size = $head['length'];
$avatar->change_count = $avatar->change_count + 1;
$avatar->last_fetched_at = now();
$avatar->save();
$avatar->media_path = $base . '/' . $path;
$avatar->is_remote = true;
$avatar->cdn_url = $local ? config('app.url') . $permalink : $permalink;
$avatar->size = $head['length'];
$avatar->change_count = $avatar->change_count + 1;
$avatar->last_fetched_at = now();
$avatar->save();
Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id);
AvatarStorageCleanup::dispatch($avatar)->onQueue($queue)->delay(now()->addMinutes(random_int(3, 15)));
Cache::forget('avatar:' . $avatar->profile_id);
AccountService::del($avatar->profile_id);
AvatarStorageCleanup::dispatch($avatar)->onQueue($queue)->delay(now()->addMinutes(random_int(3, 15)));
unlink($tmpName);
}
unlink($tmpName);
}
public static function delete(Media $media, $confirm = false)
{
if(!$confirm) {
return;
}
MediaDeletePipeline::dispatch($media)->onQueue('mmo');
}
public static function delete(Media $media, $confirm = false)
{
if(!$confirm) {
return;
}
MediaDeletePipeline::dispatch($media)->onQueue('mmo');
}
protected function cloudMove($media)
{
if(!Storage::exists($media->media_path)) {
return 'invalid file';
}
$path = storage_path('app/'.$media->media_path);
$thumb = false;
if($media->thumbnail_path) {
$thumb = storage_path('app/'.$media->thumbnail_path);
$pt = explode('/', $media->thumbnail_path);
$thumbname = array_pop($pt);
}
$p = explode('/', $media->media_path);
$name = array_pop($p);
$storagePath = implode('/', $p);
$url = ResilientMediaStorageService::store($storagePath, $path, $name);
if($thumb) {
$thumbUrl = ResilientMediaStorageService::store($storagePath, $thumb, $thumbname);
$media->thumbnail_url = $thumbUrl;
}
$media->cdn_url = $url;
$media->optimized_url = $url;
$media->replicated_at = now();
$media->save();
if($media->status_id) {
Cache::forget('status:transformer:media:attachments:' . $media->status_id);
MediaService::del($media->status_id);
StatusService::del($media->status_id, false);
}
return 'success';
}
}

View file

@ -39,6 +39,12 @@ return [
// Limit to specific user ids, in comma separated format
'user_ids' => env('PF_IMPORT_IG_PERM_ONLY_USER_IDS', null),
],
'storage' => [
'cloud' => [
'enabled' => env('PF_IMPORT_IG_CLOUD_STORAGE', env('PF_ENABLE_CLOUD', false)),
]
]
]
];