From 860f76ce497417d7dc30347aed586bd1dea95df5 Mon Sep 17 00:00:00 2001 From: yggverse Date: Fri, 27 Sep 2024 00:07:48 +0300 Subject: [PATCH] draft link parser --- src/browser/main/tab/page/content/mod.rs | 5 +- .../main/tab/page/content/text/gemini/mod.rs | 16 ++- .../page/content/text/gemini/reader/mod.rs | 13 +- .../content/text/gemini/reader/parser/link.rs | 117 ++++++++++++++++++ .../content/text/gemini/reader/parser/mod.rs | 1 + src/browser/main/tab/page/content/text/mod.rs | 6 +- src/browser/main/tab/page/mod.rs | 2 +- 7 files changed, 147 insertions(+), 13 deletions(-) create mode 100644 src/browser/main/tab/page/content/text/gemini/reader/parser/link.rs diff --git a/src/browser/main/tab/page/content/mod.rs b/src/browser/main/tab/page/content/mod.rs index f46a76f4..625bfa7b 100644 --- a/src/browser/main/tab/page/content/mod.rs +++ b/src/browser/main/tab/page/content/mod.rs @@ -4,6 +4,7 @@ mod text; use text::Text; use gtk::{ + glib::Uri, prelude::{BoxExt, WidgetExt}, Box, Orientation, }; @@ -29,7 +30,7 @@ impl Content { } // Actions - pub fn reset(&self, mime: Mime, data: &str) { + pub fn reset(&self, mime: Mime, base: &Uri, data: &str) { // Cleanup while let Some(child) = self.widget.last_child() { self.widget.remove(&child) @@ -38,7 +39,7 @@ impl Content { // Compose match mime { Mime::TextGemini => { - self.widget.append(Text::gemini(data).widget()); + self.widget.append(Text::gemini(data, base).widget()); } Mime::TextPlain => { todo!() diff --git a/src/browser/main/tab/page/content/text/gemini/mod.rs b/src/browser/main/tab/page/content/text/gemini/mod.rs index 42d532d9..04985c2b 100644 --- a/src/browser/main/tab/page/content/text/gemini/mod.rs +++ b/src/browser/main/tab/page/content/text/gemini/mod.rs @@ -2,17 +2,21 @@ mod reader; use reader::Reader; -use gtk::Viewport; +use gtk::{ + glib::{GString, Uri}, + Viewport, +}; pub struct Gemini { + reader: Reader, widget: Viewport, } impl Gemini { // Construct - pub fn new(gemtext: &str) -> Self { + pub fn new(gemtext: &str, base: &Uri) -> Self { // Init components - let reader = Reader::new(gemtext); + let reader = Reader::new(gemtext, base); // Init widget let widget = Viewport::builder().scroll_to_focus(false).build(); @@ -20,10 +24,14 @@ impl Gemini { widget.set_child(Some(reader.widget())); // Result - Self { widget } + Self { reader, widget } } // Getters + pub fn reader_title(&self) -> &Option { + &self.reader.title() + } + pub fn widget(&self) -> &Viewport { &self.widget } diff --git a/src/browser/main/tab/page/content/text/gemini/reader/mod.rs b/src/browser/main/tab/page/content/text/gemini/reader/mod.rs index cc6f1afa..3a2ce8d8 100644 --- a/src/browser/main/tab/page/content/text/gemini/reader/mod.rs +++ b/src/browser/main/tab/page/content/text/gemini/reader/mod.rs @@ -1,10 +1,11 @@ mod parser; use parser::header::Header; +use parser::link::Link; use parser::plain::Plain; use gtk::{ - glib::GString, + glib::{GString, Uri}, prelude::{StyleContextExt, WidgetExt}, Align, CssProvider, Label, STYLE_PROVIDER_PRIORITY_APPLICATION, }; @@ -17,7 +18,7 @@ pub struct Reader { impl Reader { // Construct - pub fn new(gemtext: &str) -> Self { + pub fn new(gemtext: &str, base: &Uri) -> Self { // Init title let mut title = None; @@ -38,7 +39,13 @@ impl Reader { continue; } - // Is link @TODO + // Is link + if let Some(link) = Link::from(line, base) { + // Format + markup.push_str(link.markup()); + + continue; + } // Nothing match, escape string just markup.push_str(Plain::from(line).markup()) diff --git a/src/browser/main/tab/page/content/text/gemini/reader/parser/link.rs b/src/browser/main/tab/page/content/text/gemini/reader/parser/link.rs new file mode 100644 index 00000000..a39c84b0 --- /dev/null +++ b/src/browser/main/tab/page/content/text/gemini/reader/parser/link.rs @@ -0,0 +1,117 @@ +use gtk::glib::{ + markup_escape_text, GString, Regex, RegexCompileFlags, RegexMatchFlags, Uri, UriFlags, +}; + +pub struct Link { + alt: Option, // [optional] alternative text + date: Option, // [optional] date @TODO store in UnixTime? + external: bool, // external link indicator + link: GString, // original link, wanted for title tooltip + markup: GString, // pango markup with escaped special chars + uri: Uri, // parsed link object (currently not in use) +} + +impl Link { + // Link structure parser + // line - gemtext subject to parse + // base - Uri object, required for: + // 1. relative to absolute address conversion + // 2. external links indication + // returns new Link struct or None + pub fn from(line: &str, base: &Uri) -> Option { + // Init struct members + let alt: Option = None; + let date: Option = None; + let external: bool; + let link: GString; + let markup: GString; + let uri: Uri; + + // Parse line + let parsed = Regex::split_simple( + r"^=>\s*([^\s]+)(\s(\d{4}-\d{2}-\d{2}))?(\s(.+))?$", + line, + RegexCompileFlags::DEFAULT, + RegexMatchFlags::DEFAULT, + ); + + // Address + match parsed.get(1) { + Some(address) => { + // Define original link value (used in titles or when alt is empty) + link = GString::from(address.as_str()); + // Links in document usually relative, make them absolute to base given + match Uri::resolve_relative(Some(&base.to_str()), address.as_str(), UriFlags::NONE) + { + Ok(resolved) => { + // Make URI parsed as always valid (no idea why does lib operate strings, not objects) + match Uri::parse(&resolved, UriFlags::NONE) { + Ok(object) => { + // Set external status + external = object.host() == base.host(); + + // Set struct URI + uri = object; + } + Err(_) => return None, + } + } + Err(_) => return None, + } + } + None => return None, + } + + // Date + if let Some(date) = parsed.get(2) { + // date = date.as_str(); + } + + // Alt + if let Some(alt) = parsed.get(3) { + // alt = alt.as_str(); + } + + // Markup + markup = GString::from(format!( + "{}\n", + markup_escape_text(&uri.to_str()), // use resolved address for href + markup_escape_text(&link), // show original address for title + markup_escape_text(&link), // @TODO + )); + + Some(Self { + alt, + date, + external, + link, + markup, + uri, + }) + } + + // Getters + pub fn alt(&self) -> &Option { + &self.alt + } + + pub fn date(&self) -> &Option { + &self.date + } + + pub fn external(&self) -> &bool { + &self.external + } + + pub fn link(&self) -> &GString { + &self.link + } + + pub fn markup(&self) -> &GString { + &self.markup + } + + pub fn uri(&self) -> &Uri { + &self.uri + } +} diff --git a/src/browser/main/tab/page/content/text/gemini/reader/parser/mod.rs b/src/browser/main/tab/page/content/text/gemini/reader/parser/mod.rs index b3230e3a..9d77b73d 100644 --- a/src/browser/main/tab/page/content/text/gemini/reader/parser/mod.rs +++ b/src/browser/main/tab/page/content/text/gemini/reader/parser/mod.rs @@ -1,2 +1,3 @@ pub mod header; +pub mod link; pub mod plain; diff --git a/src/browser/main/tab/page/content/text/mod.rs b/src/browser/main/tab/page/content/text/mod.rs index 5e6e60ed..12d4ba39 100644 --- a/src/browser/main/tab/page/content/text/mod.rs +++ b/src/browser/main/tab/page/content/text/mod.rs @@ -2,7 +2,7 @@ mod gemini; use gemini::Gemini; -use gtk::ScrolledWindow; +use gtk::{glib::Uri, ScrolledWindow}; pub struct Text { widget: ScrolledWindow, @@ -10,9 +10,9 @@ pub struct Text { impl Text { // Construct - pub fn gemini(gemtext: &str) -> Self { + pub fn gemini(gemtext: &str, base: &Uri) -> Self { // Init components - let gemini = Gemini::new(gemtext); + let gemini = Gemini::new(gemtext, base); // Init widget let widget = ScrolledWindow::builder().build(); diff --git a/src/browser/main/tab/page/mod.rs b/src/browser/main/tab/page/mod.rs index 8f3865ef..8087de08 100644 --- a/src/browser/main/tab/page/mod.rs +++ b/src/browser/main/tab/page/mod.rs @@ -160,7 +160,7 @@ impl Page { meta.borrow_mut().mime = Mime::TextGemini; // Select widget match parts.get(4) { - Some(source) => content.reset(content::Mime::TextGemini, source), + Some(source) => content.reset(content::Mime::TextGemini, &uri, &source), None => todo!(), } },