From 266b8bfa95f500fa0dfcd3782719f893c697b8ae Mon Sep 17 00:00:00 2001 From: yggverse Date: Sun, 8 Mar 2026 06:48:24 +0200 Subject: [PATCH] draft links parser --- Cargo.lock | 33 ++++ Cargo.toml | 1 + .../tab/item/page/content/text/markdown.rs | 156 ++++++++++++------ 3 files changed, 141 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d07af96f..ba2dd8fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,7 @@ dependencies = [ "plurify", "r2d2", "r2d2_sqlite", + "regex", "rusqlite", "sourceview5", "syntect", @@ -31,6 +32,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "ansi-parser" version = "0.9.1" @@ -1131,6 +1141,29 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.8.10" diff --git a/Cargo.toml b/Cargo.toml index a419e8d3..4a529faf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ openssl = "0.10.72" plurify = "0.2.0" r2d2 = "0.8.10" r2d2_sqlite = "0.32.0" +regex = "1.12.3" syntect = "5.2.0" # development diff --git a/src/app/browser/window/tab/item/page/content/text/markdown.rs b/src/app/browser/window/tab/item/page/content/text/markdown.rs index 0435dd64..1e051762 100644 --- a/src/app/browser/window/tab/item/page/content/text/markdown.rs +++ b/src/app/browser/window/tab/item/page/content/text/markdown.rs @@ -13,11 +13,12 @@ use gtk::{ UriLauncher, Window, WrapMode, gdk::{BUTTON_MIDDLE, BUTTON_PRIMARY, BUTTON_SECONDARY, RGBA}, gio::{Cancellable, SimpleAction, SimpleActionGroup}, - glib::{Uri, uuid_string_random}, + glib::{Uri, UriFlags, uuid_string_random}, prelude::{PopoverExt, TextBufferExt, TextBufferExtManual, TextTagExt, TextViewExt, WidgetExt}, }; use gutter::Gutter; use icon::Icon; +use regex::Regex; use sourceview::prelude::{ActionExt, ActionMapExt, DisplayExt, ToVariant}; use std::{cell::Cell, collections::HashMap, rc::Rc}; use syntax::Syntax; @@ -39,20 +40,6 @@ impl Markdown { base: &Uri, markdown: &str, ) -> Result { - /// Header tag - fn header(buffer: &TextBuffer, tag: &TextTag, line: &str, pattern: &str) -> Option { - if let Some(h) = line.trim_start().strip_prefix(pattern) - && !h.starts_with(pattern) - { - let header = h.trim(); - buffer.insert_with_tags(&mut buffer.end_iter(), header, &[tag]); - buffer.insert(&mut buffer.end_iter(), NEW_LINE); - Some(header.into()) - } else { - None - } - } - // Init default values let mut title = None; @@ -120,7 +107,7 @@ impl Markdown { t == 0 || t.is_multiple_of(2) }; - // Parse markdown lines + // Parse single-line markdown tags 'l: for line in markdown.lines() { if is_code_enabled { use ggemtext::line::Code; @@ -230,39 +217,6 @@ impl Markdown { } } - // Is link - if let Some(link) = ggemtext::line::Link::parse(line) { - if let Some(uri) = link.uri(Some(base)) { - let mut alt = Vec::new(); - - if uri.scheme() != base.scheme() { - alt.push("⇖".to_string()); - } - - alt.push(match link.alt { - Some(alt) => alt, - None => uri.to_string(), - }); - - let a = TextTag::builder() - .foreground_rgba(&link_color.0) - // .foreground_rgba(&adw::StyleManager::default().accent_color_rgba()) @TODO adw 1.6 / ubuntu 24.10+ - .sentence(true) - .wrap_mode(WrapMode::Word) - .build(); - - if !tag.text_tag_table.add(&a) { - panic!() - } - - buffer.insert_with_tags(&mut buffer.end_iter(), &alt.join(" "), &[&a]); - buffer.insert(&mut buffer.end_iter(), NEW_LINE); - - links.insert(a, uri); - } - continue; - } - // Is list if let Some(value) = ggemtext::line::list::Gemtext::as_value(line) { @@ -300,6 +254,10 @@ impl Markdown { buffer.insert(&mut buffer.end_iter(), NEW_LINE); } + // Parse in-line markdown tags + + link(&buffer, &tag, base, &link_color.0, &mut links); + // Context menu let action_link_tab = SimpleAction::new_stateful(&uuid_string_random(), None, &String::new().to_variant()); @@ -596,6 +554,106 @@ fn link_prefix(request: String, prefix: &str) -> String { format!("{prefix}{}", request.trim_start_matches(prefix)) } +/// Link +fn link( + buffer: &TextBuffer, + tag: &Tag, + base: &Uri, + link_color: &RGBA, + links: &mut HashMap, +) { + let start_iter = buffer.start_iter(); + let end_iter = buffer.end_iter(); + let full_content = buffer.text(&start_iter, &end_iter, true).to_string(); + + buffer.set_text(""); + + let mut last_pos = 0; + for cap in Regex::new(r"(?P!?)\[(?P[^\]]+)\]\((?P[^\)]+)\)") + .unwrap() + .captures_iter(&full_content) + { + let full_match = cap.get(0).unwrap(); + let before = &full_content[last_pos..full_match.start()]; + if !before.is_empty() { + buffer.insert(&mut buffer.end_iter(), before); + } + // Relative scheme patch + // https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 + let unresolved_url = match cap["url"].strip_prefix("//") { + Some(p) => { + let s = p.trim_start_matches(":"); + &format!( + "{}://{}", + base.scheme(), + if s.is_empty() { + format!("{}/", base.host().unwrap_or_default()) + } else { + s.into() + } + ) + } + None => &cap["url"], + }; + // Convert address to the valid URI, + // resolve to absolute URL format if the target is relative + match Uri::resolve_relative(Some(&base.to_string()), unresolved_url, UriFlags::NONE) { + Ok(url) => match Uri::parse(&url, UriFlags::NONE) { + Ok(uri) => { + let alt = { + let mut a: Vec<&str> = Vec::with_capacity(2); + if uri.scheme() != base.scheme() { + a.push("⇖"); + } + if cap["text"].is_empty() { + a.push(&cap["url"]); + } else { + a.push(&cap["text"]); + } + a.join(" ") + }; + + let a = TextTag::builder() + .foreground_rgba(link_color) + // .foreground_rgba(&adw::StyleManager::default().accent_color_rgba()) + // @TODO adw 1.6 / ubuntu 24.10+ + .sentence(true) + .wrap_mode(WrapMode::Word) + .build(); + + if !tag.text_tag_table.add(&a) { + panic!() + } + + buffer.insert_with_tags(&mut buffer.end_iter(), &alt, &[&a]); + links.insert(a, uri); + } + Err(_) => todo!(), + }, + Err(_) => continue, + } + last_pos = full_match.end(); + } + let after = &full_content[last_pos..]; + if !after.is_empty() { + buffer.insert(&mut buffer.end_iter(), after); + } +} + +/// Header tag +fn header(buffer: &TextBuffer, tag: &TextTag, line: &str, pattern: &str) -> Option { + if let Some(h) = line.trim_start().strip_prefix(pattern) + && !h.starts_with(pattern) + { + let header = h.trim(); + buffer.insert_with_tags(&mut buffer.end_iter(), header, &[tag]); + buffer.insert(&mut buffer.end_iter(), NEW_LINE); + Some(header.into()) + } else { + None + } +} + const LINK_PREFIX_DOWNLOAD: &str = "download:"; const LINK_PREFIX_SOURCE: &str = "source:";