fix: Instagram import broken UTF8 characters

This commit is contained in:
paule 2023-11-01 05:29:10 +01:00
parent da510089e2
commit 950baef58b

View file

@ -348,8 +348,16 @@
}, 500); }, 500);
}, },
// Facebook and Instagram are encoding UTF8 characters in a weird way in their json
// here is a good explanation what's going wrong https://sorashi.github.io/fix-facebook-json-archive-encoding
fixFacebookEncoding(string) {
const replaced = string.replace(/\\u00([a-f0-9]{2})/g, (x) => String.fromCharCode(parseInt(x.slice(2), 16)));
const buffer = Array.from(replaced, (c) => c.charCodeAt(0));
return new TextDecoder().decode(new Uint8Array(buffer));
},
filterPostMeta(media) { filterPostMeta(media) {
let json = JSON.parse(media); let json = JSON.parse(this.fixFacebookEncoding(media));
let res = json.filter(j => { let res = json.filter(j => {
let ids = j.media.map(m => m.uri).filter(m => { let ids = j.media.map(m => m.uri).filter(m => {
if(this.config.allow_video_posts == true) { if(this.config.allow_video_posts == true) {
@ -396,12 +404,14 @@
this.filterPostMeta(media); this.filterPostMeta(media);
let imgs = await Promise.all(entries.filter(entry => { let imgs = await Promise.all(entries.filter(entry => {
return entry.filename.startsWith('media/posts/') && (entry.filename.endsWith('.png') || entry.filename.endsWith('.jpg') || entry.filename.endsWith('.mp4')); return (entry.filename.startsWith('media/posts/') || entry.filename.startsWith('media/other/')) && (entry.filename.endsWith('.png') || entry.filename.endsWith('.jpg') || entry.filename.endsWith('.mp4'));
}) })
.map(async entry => { .map(async entry => {
if( if(
entry.filename.startsWith('media/posts/') &&
( (
entry.filename.startsWith('media/posts/') ||
entry.filename.startsWith('media/other/')
) && (
entry.filename.endsWith('.png') || entry.filename.endsWith('.png') ||
entry.filename.endsWith('.jpg') || entry.filename.endsWith('.jpg') ||
entry.filename.endsWith('.mp4') entry.filename.endsWith('.mp4')