From 22a05a975c95bd8ce1545fc5bccc10dae4120ce3 Mon Sep 17 00:00:00 2001 From: yggverse Date: Mon, 17 Mar 2025 21:39:07 +0200 Subject: [PATCH] remove regex dependency, rename constructor, add tests --- README.md | 42 +++++----- src/line/link.rs | 189 +++++++++++++++++++++++-------------------- tests/integration.rs | 93 ++++++++++++--------- 3 files changed, 177 insertions(+), 147 deletions(-) diff --git a/README.md b/README.md index cc19618..8bbfde8 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ for line in gemtext.lines() { ``` rust use ggemtext::line::code::Inline; -match Inline::from("```inline```") { +match Inline::parse("```inline```") { Some(inline) => assert_eq!(inline.value, "inline"), None => assert!(false), } @@ -93,31 +93,25 @@ assert_eq!("H1".to_source(&Level::H1), "# H1"); #### Link ``` rust -use ggemtext::line::Link; -match Link::from( - "=> gemini://geminiprotocol.net 1965-01-19 Gemini", - None, // absolute path given, base not wanted - Some(&glib::TimeZone::local()), -) { - Some(link) => { - // Alt - assert_eq!(link.alt, Some("Gemini".into())); +use crate::line::Link; - // Date - match link.timestamp { - Some(timestamp) => { - assert_eq!(timestamp.year(), 1965); - assert_eq!(timestamp.month(), 1); - assert_eq!(timestamp.day_of_month(), 19); - } - None => assert!(false), - } +const SOURCE: &str = "=> gemini://geminiprotocol.net 1965-01-19 Gemini"; - // URI - assert_eq!(link.uri.to_string(), "gemini://geminiprotocol.net"); - } - None => assert!(false), -} +let link = Link::parse(SOURCE).unwrap(); + +assert_eq!(link.alt, Some("1965-01-19 Gemini".to_string())); +assert_eq!(link.url, "gemini://geminiprotocol.net"); + +let uri = link.uri(None).unwrap(); +assert_eq!(uri.scheme(), "gemini"); +assert_eq!(uri.host().unwrap(), "geminiprotocol.net"); + +let time = link.time(Some(&glib::TimeZone::local())).unwrap(); +assert_eq!(time.year(), 1965); +assert_eq!(time.month(), 1); +assert_eq!(time.day_of_month(), 19); + +assert_eq!(link.to_source(), SOURCE); ``` #### List diff --git a/src/line/link.rs b/src/line/link.rs index 4b7d4bc..d95cd47 100644 --- a/src/line/link.rs +++ b/src/line/link.rs @@ -1,104 +1,119 @@ -use glib::{DateTime, Regex, RegexCompileFlags, RegexMatchFlags, TimeZone, Uri, UriFlags}; +use glib::{DateTime, TimeZone, Uri, UriFlags}; +const S: char = ' '; pub const TAG: &str = "=>"; /// [Link](https://geminiprotocol.net/docs/gemtext-specification.gmi#link-lines) entity holder pub struct Link { - pub alt: Option, // [optional] alternative link description - pub timestamp: Option, // [optional] valid link DateTime object - pub uri: Uri, // [required] valid link URI object + /// For performance reasons, hold Gemtext date and alternative together as the optional String + /// * to extract valid [DateTime](https://docs.gtk.org/glib/struct.DateTime.html) use `time` implementation method + pub alt: Option, + /// For performance reasons, hold URL as the raw String + /// * to extract valid [Uri](https://docs.gtk.org/glib/struct.Uri.html) use `uri` implementation method + pub url: String, } impl Link { // Constructors /// Parse `Self` from line string - pub fn from(line: &str, base: Option<&Uri>, timezone: Option<&TimeZone>) -> Option { - // Skip next operations on prefix mismatch - // * replace regex implementation @TODO - if !line.starts_with(TAG) { + pub fn parse(line: &str) -> Option { + let l = line.strip_prefix(TAG)?.trim(); + let u = l.find(S).map_or(l, |i| &l[..i]); + if u.is_empty() { return None; } - - // Define initial values - let mut alt = None; - let mut timestamp = None; - - // Begin line parse - let regex = Regex::split_simple( - r"^=>\s*([^\s]+)\s*(\d{4}-\d{2}-\d{2})?\s*(.+)?$", - line, - RegexCompileFlags::DEFAULT, - RegexMatchFlags::DEFAULT, - ); - - // Detect address required to continue - let mut unresolved_address = regex.get(1)?.to_string(); - - // Relative scheme patch - // https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 - if let Some(p) = unresolved_address.strip_prefix("//") { - let b = base?; - let postfix = p.trim_start_matches(":"); - unresolved_address = format!( - "{}://{}", - b.scheme(), - if postfix.is_empty() { - format!("{}/", b.host()?) - } else { - postfix.into() - } - ) - } - // Convert address to the valid URI - let uri = match base { - // Base conversion requested - Some(base_uri) => { - // Convert relative address to absolute - match Uri::resolve_relative( - Some(&base_uri.to_str()), - unresolved_address.as_str(), - UriFlags::NONE, - ) { - Ok(resolved_str) => { - // Try convert string to the valid URI - match Uri::parse(&resolved_str, UriFlags::NONE) { - Ok(resolved_uri) => resolved_uri, - Err(_) => return None, - } - } - Err(_) => return None, - } - } - // Base resolve not requested - None => { - // Try convert address to valid URI - match Uri::parse(&unresolved_address, UriFlags::NONE) { - Ok(unresolved_uri) => unresolved_uri, - Err(_) => return None, - } - } - }; - - // Timestamp - if let Some(date) = regex.get(2) { - timestamp = match DateTime::from_iso8601(&format!("{date}T00:00:00"), timezone) { - Ok(value) => Some(value), - Err(_) => None, - } - } - - // Alt - if let Some(value) = regex.get(3) { - if !value.is_empty() { - alt = Some(value.to_string()) - } - }; - Some(Self { - alt, - timestamp, - uri, + alt: l + .get(u.len()..) + .map(|a| a.trim()) + .filter(|a| !a.is_empty()) + .map(|a| a.to_string()), + url: u.to_string(), }) } + + // Converters + + /// Convert `Self` to [Gemtext](https://geminiprotocol.net/docs/gemtext-specification.gmi) line + pub fn to_source(&self) -> String { + let mut s = String::with_capacity( + TAG.len() + self.url.len() + self.alt.as_ref().map_or(0, |a| a.len()) + 2, + ); + s.push_str(TAG); + s.push(S); + s.push_str(&self.url); + if let Some(ref alt) = self.alt { + s.push(S); + s.push_str(alt); + } + s + } + + // Getters + + /// Get valid [DateTime](https://docs.gtk.org/glib/struct.DateTime.html) for `Self` + pub fn time(&self, timezone: Option<&TimeZone>) -> Option { + let a = self.alt.as_ref()?; + let t = &a[..a.find(S).unwrap_or(a.len())]; + DateTime::from_iso8601(&format!("{t}T00:00:00"), timezone).ok() + } + + /// Get valid [Uri](https://docs.gtk.org/glib/struct.Uri.html) for `Self` + pub fn uri(&self, base: Option<&Uri>) -> Option { + // Relative scheme patch + // https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 + let unresolved_address = match self.url.strip_prefix("//") { + Some(p) => { + let b = base?; + let s = p.trim_start_matches(":"); + &format!( + "{}://{}", + b.scheme(), + if s.is_empty() { + format!("{}/", b.host()?) + } else { + s.into() + } + ) + } + None => &self.url, + }; + // Convert address to the valid URI, + // resolve to absolute URL format if the target is relative + match base { + Some(base_uri) => match Uri::resolve_relative( + Some(&base_uri.to_str()), + unresolved_address, + UriFlags::NONE, + ) { + Ok(resolved_str) => Uri::parse(&resolved_str, UriFlags::NONE).ok(), + Err(_) => None, + }, + None => Uri::parse(unresolved_address, UriFlags::NONE).ok(), + } + } +} + +#[test] +fn test() { + use crate::line::Link; + + const SOURCE: &str = "=> gemini://geminiprotocol.net 1965-01-19 Gemini"; + + let link = Link::parse(SOURCE).unwrap(); + + assert_eq!(link.alt, Some("1965-01-19 Gemini".to_string())); + assert_eq!(link.url, "gemini://geminiprotocol.net"); + + let uri = link.uri(None).unwrap(); + assert_eq!(uri.scheme(), "gemini"); + assert_eq!(uri.host().unwrap(), "geminiprotocol.net"); + + let time = link.time(Some(&glib::TimeZone::local())).unwrap(); + assert_eq!(time.year(), 1965); + assert_eq!(time.month(), 1); + assert_eq!(time.day_of_month(), 19); + + assert_eq!(link.to_source(), SOURCE); } diff --git a/tests/integration.rs b/tests/integration.rs index c9168fe..faf24ff 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -25,13 +25,10 @@ fn gemtext() { let mut code_multiline_buffer: Option = None; // Define base URI as integration.gmi contain one relative link - let base = match Uri::parse("gemini://geminiprotocol.net", UriFlags::NONE) { - Ok(uri) => Some(uri), - Err(_) => None, - }; + let base = Uri::parse("gemini://geminiprotocol.net", UriFlags::NONE).unwrap(); // Define timezone as integration.gmi contain one links with date - let timezone = Some(TimeZone::local()); + let timezone = TimeZone::local(); // Parse document by line for line in gemtext.lines() { @@ -66,7 +63,7 @@ fn gemtext() { } // Link - if let Some(result) = Link::from(line, base.as_ref(), timezone.as_ref()) { + if let Some(result) = Link::parse(line) { links.push(result); continue; } @@ -150,52 +147,64 @@ fn gemtext() { let item = link.next().unwrap(); assert_eq!(item.alt, None); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_str(), + "gemini://geminiprotocol.net" + ); } // #1 { let item = link.next().unwrap(); - assert_eq!(item.alt, None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net" + ); - let timestamp = item.timestamp.clone().unwrap(); - assert_eq!(timestamp.year(), 1965); - assert_eq!(timestamp.month(), 1); - assert_eq!(timestamp.day_of_month(), 19); + let time = item.time(Some(&timezone)).unwrap(); + assert_eq!(time.year(), 1965); + assert_eq!(time.month(), 1); + assert_eq!(time.day_of_month(), 19); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!(item.alt, Some("1965-01-19".to_string())); } // #2 { let item = link.next().unwrap(); assert_eq!(item.alt.clone().unwrap(), "Gemini"); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net" + ); } // #3 { let item = link.next().unwrap(); - assert_eq!(item.alt.clone().unwrap(), "Gemini"); + assert_eq!(item.alt, Some("1965-01-19 Gemini".to_string())); - let timestamp = item.timestamp.clone().unwrap(); - assert_eq!(timestamp.year(), 1965); - assert_eq!(timestamp.month(), 1); - assert_eq!(timestamp.day_of_month(), 19); + let time = item.time(Some(&timezone)).unwrap(); + assert_eq!(time.year(), 1965); + assert_eq!(time.month(), 1); + assert_eq!(time.day_of_month(), 19); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net" + ); } // #4 { let item = link.next().unwrap(); - assert_eq!(item.alt.clone().unwrap(), "Gemini"); + assert_eq!(item.alt, Some("1965-01-19 Gemini".to_string())); - let timestamp = item.timestamp.clone().unwrap(); - assert_eq!(timestamp.year(), 1965); - assert_eq!(timestamp.month(), 1); - assert_eq!(timestamp.day_of_month(), 19); + let time = item.time(Some(&timezone)).unwrap(); + assert_eq!(time.year(), 1965); + assert_eq!(time.month(), 1); + assert_eq!(time.day_of_month(), 19); assert_eq!( - item.uri.to_str(), + item.uri(Some(&base)).unwrap().to_string(), "gemini://geminiprotocol.net/docs/gemtext.gmi" ); } // #5 @@ -203,29 +212,41 @@ fn gemtext() { let item = link.next().unwrap(); assert_eq!(item.alt, None); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net" + ); } // #6 { let item = link.next().unwrap(); assert_eq!(item.alt, None); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net" + ); } // #7 { let item = link.next().unwrap(); assert_eq!(item.alt, None); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net/path"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net/path" + ); } // #8 { let item = link.next().unwrap(); assert_eq!(item.alt, None); - assert_eq!(item.timestamp, None); - assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net/"); + assert_eq!(item.time(Some(&timezone)), None); + assert_eq!( + item.uri(Some(&base)).unwrap().to_string(), + "gemini://geminiprotocol.net/" + ); } // #9 // Validate lists