remove regex dependency, rename constructor, add tests

This commit is contained in:
yggverse 2025-03-17 21:39:07 +02:00
parent 0c90bbafba
commit 22a05a975c
3 changed files with 177 additions and 147 deletions

View file

@ -34,7 +34,7 @@ for line in gemtext.lines() {
``` rust ``` rust
use ggemtext::line::code::Inline; use ggemtext::line::code::Inline;
match Inline::from("```inline```") { match Inline::parse("```inline```") {
Some(inline) => assert_eq!(inline.value, "inline"), Some(inline) => assert_eq!(inline.value, "inline"),
None => assert!(false), None => assert!(false),
} }
@ -93,31 +93,25 @@ assert_eq!("H1".to_source(&Level::H1), "# H1");
#### Link #### Link
``` rust ``` rust
use ggemtext::line::Link; use crate::line::Link;
match Link::from(
"=> gemini://geminiprotocol.net 1965-01-19 Gemini",
None, // absolute path given, base not wanted
Some(&glib::TimeZone::local()),
) {
Some(link) => {
// Alt
assert_eq!(link.alt, Some("Gemini".into()));
// Date const SOURCE: &str = "=> gemini://geminiprotocol.net 1965-01-19 Gemini";
match link.timestamp {
Some(timestamp) => {
assert_eq!(timestamp.year(), 1965);
assert_eq!(timestamp.month(), 1);
assert_eq!(timestamp.day_of_month(), 19);
}
None => assert!(false),
}
// URI let link = Link::parse(SOURCE).unwrap();
assert_eq!(link.uri.to_string(), "gemini://geminiprotocol.net");
} assert_eq!(link.alt, Some("1965-01-19 Gemini".to_string()));
None => assert!(false), assert_eq!(link.url, "gemini://geminiprotocol.net");
}
let uri = link.uri(None).unwrap();
assert_eq!(uri.scheme(), "gemini");
assert_eq!(uri.host().unwrap(), "geminiprotocol.net");
let time = link.time(Some(&glib::TimeZone::local())).unwrap();
assert_eq!(time.year(), 1965);
assert_eq!(time.month(), 1);
assert_eq!(time.day_of_month(), 19);
assert_eq!(link.to_source(), SOURCE);
``` ```
#### List #### List

View file

@ -1,104 +1,119 @@
use glib::{DateTime, Regex, RegexCompileFlags, RegexMatchFlags, TimeZone, Uri, UriFlags}; use glib::{DateTime, TimeZone, Uri, UriFlags};
const S: char = ' ';
pub const TAG: &str = "=>"; pub const TAG: &str = "=>";
/// [Link](https://geminiprotocol.net/docs/gemtext-specification.gmi#link-lines) entity holder /// [Link](https://geminiprotocol.net/docs/gemtext-specification.gmi#link-lines) entity holder
pub struct Link { pub struct Link {
pub alt: Option<String>, // [optional] alternative link description /// For performance reasons, hold Gemtext date and alternative together as the optional String
pub timestamp: Option<DateTime>, // [optional] valid link DateTime object /// * to extract valid [DateTime](https://docs.gtk.org/glib/struct.DateTime.html) use `time` implementation method
pub uri: Uri, // [required] valid link URI object pub alt: Option<String>,
/// For performance reasons, hold URL as the raw String
/// * to extract valid [Uri](https://docs.gtk.org/glib/struct.Uri.html) use `uri` implementation method
pub url: String,
} }
impl Link { impl Link {
// Constructors // Constructors
/// Parse `Self` from line string /// Parse `Self` from line string
pub fn from(line: &str, base: Option<&Uri>, timezone: Option<&TimeZone>) -> Option<Self> { pub fn parse(line: &str) -> Option<Self> {
// Skip next operations on prefix mismatch let l = line.strip_prefix(TAG)?.trim();
// * replace regex implementation @TODO let u = l.find(S).map_or(l, |i| &l[..i]);
if !line.starts_with(TAG) { if u.is_empty() {
return None; return None;
} }
Some(Self {
alt: l
.get(u.len()..)
.map(|a| a.trim())
.filter(|a| !a.is_empty())
.map(|a| a.to_string()),
url: u.to_string(),
})
}
// Define initial values // Converters
let mut alt = None;
let mut timestamp = None;
// Begin line parse /// Convert `Self` to [Gemtext](https://geminiprotocol.net/docs/gemtext-specification.gmi) line
let regex = Regex::split_simple( pub fn to_source(&self) -> String {
r"^=>\s*([^\s]+)\s*(\d{4}-\d{2}-\d{2})?\s*(.+)?$", let mut s = String::with_capacity(
line, TAG.len() + self.url.len() + self.alt.as_ref().map_or(0, |a| a.len()) + 2,
RegexCompileFlags::DEFAULT,
RegexMatchFlags::DEFAULT,
); );
s.push_str(TAG);
s.push(S);
s.push_str(&self.url);
if let Some(ref alt) = self.alt {
s.push(S);
s.push_str(alt);
}
s
}
// Detect address required to continue // Getters
let mut unresolved_address = regex.get(1)?.to_string();
/// Get valid [DateTime](https://docs.gtk.org/glib/struct.DateTime.html) for `Self`
pub fn time(&self, timezone: Option<&TimeZone>) -> Option<DateTime> {
let a = self.alt.as_ref()?;
let t = &a[..a.find(S).unwrap_or(a.len())];
DateTime::from_iso8601(&format!("{t}T00:00:00"), timezone).ok()
}
/// Get valid [Uri](https://docs.gtk.org/glib/struct.Uri.html) for `Self`
pub fn uri(&self, base: Option<&Uri>) -> Option<Uri> {
// Relative scheme patch // Relative scheme patch
// https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 // https://datatracker.ietf.org/doc/html/rfc3986#section-4.2
if let Some(p) = unresolved_address.strip_prefix("//") { let unresolved_address = match self.url.strip_prefix("//") {
Some(p) => {
let b = base?; let b = base?;
let postfix = p.trim_start_matches(":"); let s = p.trim_start_matches(":");
unresolved_address = format!( &format!(
"{}://{}", "{}://{}",
b.scheme(), b.scheme(),
if postfix.is_empty() { if s.is_empty() {
format!("{}/", b.host()?) format!("{}/", b.host()?)
} else { } else {
postfix.into() s.into()
} }
) )
} }
// Convert address to the valid URI None => &self.url,
let uri = match base { };
// Base conversion requested // Convert address to the valid URI,
Some(base_uri) => { // resolve to absolute URL format if the target is relative
// Convert relative address to absolute match base {
match Uri::resolve_relative( Some(base_uri) => match Uri::resolve_relative(
Some(&base_uri.to_str()), Some(&base_uri.to_str()),
unresolved_address.as_str(), unresolved_address,
UriFlags::NONE, UriFlags::NONE,
) { ) {
Ok(resolved_str) => { Ok(resolved_str) => Uri::parse(&resolved_str, UriFlags::NONE).ok(),
// Try convert string to the valid URI
match Uri::parse(&resolved_str, UriFlags::NONE) {
Ok(resolved_uri) => resolved_uri,
Err(_) => return None,
}
}
Err(_) => return None,
}
}
// Base resolve not requested
None => {
// Try convert address to valid URI
match Uri::parse(&unresolved_address, UriFlags::NONE) {
Ok(unresolved_uri) => unresolved_uri,
Err(_) => return None,
}
}
};
// Timestamp
if let Some(date) = regex.get(2) {
timestamp = match DateTime::from_iso8601(&format!("{date}T00:00:00"), timezone) {
Ok(value) => Some(value),
Err(_) => None, Err(_) => None,
},
None => Uri::parse(unresolved_address, UriFlags::NONE).ok(),
} }
} }
}
// Alt
if let Some(value) = regex.get(3) { #[test]
if !value.is_empty() { fn test() {
alt = Some(value.to_string()) use crate::line::Link;
}
}; const SOURCE: &str = "=> gemini://geminiprotocol.net 1965-01-19 Gemini";
Some(Self { let link = Link::parse(SOURCE).unwrap();
alt,
timestamp, assert_eq!(link.alt, Some("1965-01-19 Gemini".to_string()));
uri, assert_eq!(link.url, "gemini://geminiprotocol.net");
})
} let uri = link.uri(None).unwrap();
assert_eq!(uri.scheme(), "gemini");
assert_eq!(uri.host().unwrap(), "geminiprotocol.net");
let time = link.time(Some(&glib::TimeZone::local())).unwrap();
assert_eq!(time.year(), 1965);
assert_eq!(time.month(), 1);
assert_eq!(time.day_of_month(), 19);
assert_eq!(link.to_source(), SOURCE);
} }

View file

@ -25,13 +25,10 @@ fn gemtext() {
let mut code_multiline_buffer: Option<Multiline> = None; let mut code_multiline_buffer: Option<Multiline> = None;
// Define base URI as integration.gmi contain one relative link // Define base URI as integration.gmi contain one relative link
let base = match Uri::parse("gemini://geminiprotocol.net", UriFlags::NONE) { let base = Uri::parse("gemini://geminiprotocol.net", UriFlags::NONE).unwrap();
Ok(uri) => Some(uri),
Err(_) => None,
};
// Define timezone as integration.gmi contain one links with date // Define timezone as integration.gmi contain one links with date
let timezone = Some(TimeZone::local()); let timezone = TimeZone::local();
// Parse document by line // Parse document by line
for line in gemtext.lines() { for line in gemtext.lines() {
@ -66,7 +63,7 @@ fn gemtext() {
} }
// Link // Link
if let Some(result) = Link::from(line, base.as_ref(), timezone.as_ref()) { if let Some(result) = Link::parse(line) {
links.push(result); links.push(result);
continue; continue;
} }
@ -150,52 +147,64 @@ fn gemtext() {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(item.alt, None);
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(
item.uri(Some(&base)).unwrap().to_str(),
"gemini://geminiprotocol.net"
);
} // #1 } // #1
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net"
);
let timestamp = item.timestamp.clone().unwrap(); let time = item.time(Some(&timezone)).unwrap();
assert_eq!(timestamp.year(), 1965); assert_eq!(time.year(), 1965);
assert_eq!(timestamp.month(), 1); assert_eq!(time.month(), 1);
assert_eq!(timestamp.day_of_month(), 19); assert_eq!(time.day_of_month(), 19);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(item.alt, Some("1965-01-19".to_string()));
} // #2 } // #2
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt.clone().unwrap(), "Gemini"); assert_eq!(item.alt.clone().unwrap(), "Gemini");
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net"
);
} // #3 } // #3
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt.clone().unwrap(), "Gemini"); assert_eq!(item.alt, Some("1965-01-19 Gemini".to_string()));
let timestamp = item.timestamp.clone().unwrap(); let time = item.time(Some(&timezone)).unwrap();
assert_eq!(timestamp.year(), 1965); assert_eq!(time.year(), 1965);
assert_eq!(timestamp.month(), 1); assert_eq!(time.month(), 1);
assert_eq!(timestamp.day_of_month(), 19); assert_eq!(time.day_of_month(), 19);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net"
);
} // #4 } // #4
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt.clone().unwrap(), "Gemini"); assert_eq!(item.alt, Some("1965-01-19 Gemini".to_string()));
let timestamp = item.timestamp.clone().unwrap(); let time = item.time(Some(&timezone)).unwrap();
assert_eq!(timestamp.year(), 1965); assert_eq!(time.year(), 1965);
assert_eq!(timestamp.month(), 1); assert_eq!(time.month(), 1);
assert_eq!(timestamp.day_of_month(), 19); assert_eq!(time.day_of_month(), 19);
assert_eq!( assert_eq!(
item.uri.to_str(), item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net/docs/gemtext.gmi" "gemini://geminiprotocol.net/docs/gemtext.gmi"
); );
} // #5 } // #5
@ -203,29 +212,41 @@ fn gemtext() {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(item.alt, None);
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net"
);
} // #6 } // #6
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(item.alt, None);
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net"
);
} // #7 } // #7
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(item.alt, None);
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net/path"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net/path"
);
} // #8 } // #8
{ {
let item = link.next().unwrap(); let item = link.next().unwrap();
assert_eq!(item.alt, None); assert_eq!(item.alt, None);
assert_eq!(item.timestamp, None); assert_eq!(item.time(Some(&timezone)), None);
assert_eq!(item.uri.to_str(), "gemini://geminiprotocol.net/"); assert_eq!(
item.uri(Some(&base)).unwrap().to_string(),
"gemini://geminiprotocol.net/"
);
} // #9 } // #9
// Validate lists // Validate lists