filter engine tags

This commit is contained in:
yggverse 2026-03-19 05:50:29 +02:00
parent 8539bffeb9
commit 11dcc0b2b4
3 changed files with 22 additions and 2 deletions

1
Cargo.lock generated
View file

@ -273,6 +273,7 @@ dependencies = [
"chrono",
"clap",
"html-to-markdown-rs",
"regex",
"rusqlite",
]

View file

@ -14,4 +14,5 @@ anyhow = "1.0.102"
chrono = "0.4.44"
clap = { version = "4.6.0", features = ["derive"] }
html-to-markdown-rs = "2.28.2"
regex = "1.12.3"
rusqlite = { version = "0.39.0", features = ["chrono"]}

View file

@ -109,7 +109,7 @@ fn main() -> Result<()> {
let mut content = Vec::new();
for post in discussion.posts {
content.push(format!(
"@{} / {}{}",
"_@{} / {}{}_",
users.get(&post.user_id).unwrap().username,
post.created_at,
post.edited_at
@ -117,7 +117,7 @@ fn main() -> Result<()> {
.unwrap_or_default()
));
content.push("---".into());
content.push(convert(&post.content, None)?)
content.push(convert(&strip_tags(&post.content), None)?)
}
content.join("\n")
});
@ -129,3 +129,21 @@ fn main() -> Result<()> {
Ok(())
}
fn strip_tags(data: &str) -> String {
use regex::Regex;
let s = Regex::new(r"<s>[^<]+</s>").unwrap();
let e = Regex::new(r"<e>[^<]+</e>").unwrap();
e.replace_all(&s.replace_all(data, ""), "")
.replace("<C", "<code")
.replace("</C>", "</code>")
.replace("<LIST", "<ul")
.replace("</LIST>", "</ul>")
.replace("<URL", "<a")
.replace("</URL>", "</a>")
.replace(" url=", " href=")
.replace("<r>", "")
.replace("</r>", "")
}