filter engine tags

This commit is contained in:
yggverse 2026-03-19 05:50:29 +02:00
parent 8539bffeb9
commit 11dcc0b2b4
3 changed files with 22 additions and 2 deletions

1
Cargo.lock generated
View file

@ -273,6 +273,7 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"html-to-markdown-rs", "html-to-markdown-rs",
"regex",
"rusqlite", "rusqlite",
] ]

View file

@ -14,4 +14,5 @@ anyhow = "1.0.102"
chrono = "0.4.44" chrono = "0.4.44"
clap = { version = "4.6.0", features = ["derive"] } clap = { version = "4.6.0", features = ["derive"] }
html-to-markdown-rs = "2.28.2" html-to-markdown-rs = "2.28.2"
regex = "1.12.3"
rusqlite = { version = "0.39.0", features = ["chrono"]} rusqlite = { version = "0.39.0", features = ["chrono"]}

View file

@ -109,7 +109,7 @@ fn main() -> Result<()> {
let mut content = Vec::new(); let mut content = Vec::new();
for post in discussion.posts { for post in discussion.posts {
content.push(format!( content.push(format!(
"@{} / {}{}", "_@{} / {}{}_",
users.get(&post.user_id).unwrap().username, users.get(&post.user_id).unwrap().username,
post.created_at, post.created_at,
post.edited_at post.edited_at
@ -117,7 +117,7 @@ fn main() -> Result<()> {
.unwrap_or_default() .unwrap_or_default()
)); ));
content.push("---".into()); content.push("---".into());
content.push(convert(&post.content, None)?) content.push(convert(&strip_tags(&post.content), None)?)
} }
content.join("\n") content.join("\n")
}); });
@ -129,3 +129,21 @@ fn main() -> Result<()> {
Ok(()) Ok(())
} }
fn strip_tags(data: &str) -> String {
use regex::Regex;
let s = Regex::new(r"<s>[^<]+</s>").unwrap();
let e = Regex::new(r"<e>[^<]+</e>").unwrap();
e.replace_all(&s.replace_all(data, ""), "")
.replace("<C", "<code")
.replace("</C>", "</code>")
.replace("<LIST", "<ul")
.replace("</LIST>", "</ul>")
.replace("<URL", "<a")
.replace("</URL>", "</a>")
.replace(" url=", " href=")
.replace("<r>", "")
.replace("</r>", "")
}