cache parsing, new syntax

Changed the parsing and also parse once when reading the config file and then
cache the parse result, rather than checking the line format every time the
file is served.
This commit is contained in:
Johann150 2021-02-04 17:00:33 +01:00
parent aa713a2dea
commit 548e0f401f
No known key found for this signature in database
GPG key ID: 9EE6577A2A06F8F1
2 changed files with 98 additions and 27 deletions

View file

@ -1,5 +1,5 @@
mod metadata; mod metadata;
use metadata::FileOptions; use metadata::{FileOptions, PresetMeta};
use { use {
once_cell::sync::Lazy, once_cell::sync::Lazy,
@ -27,12 +27,11 @@ fn main() -> Result {
env_logger::Builder::new().parse_filters("info").init(); env_logger::Builder::new().parse_filters("info").init();
} }
Runtime::new()?.block_on(async { Runtime::new()?.block_on(async {
let mimetypes = Arc::new(RwLock::new(FileOptions::new( let mimetypes = Arc::new(RwLock::new(FileOptions::new(PresetMeta::Parameters(
&ARGS ARGS.language
.language
.as_ref() .as_ref()
.map_or(String::new(), |lang| format!(";lang={}", lang)), .map_or(String::new(), |lang| format!(";lang={}", lang)),
))); ))));
let listener = TcpListener::bind(&ARGS.addrs[..]).await?; let listener = TcpListener::bind(&ARGS.addrs[..]).await?;
log::info!("Listening on {:?}...", ARGS.addrs); log::info!("Listening on {:?}...", ARGS.addrs);
loop { loop {
@ -317,7 +316,15 @@ impl RequestHandle {
} }
} }
// Make sure the file opens successfully before sending the success header. let data = self.metadata.write().await.get(&path);
if let PresetMeta::FullHeader(status, meta) = data {
self.send_header(status, &meta).await?;
// do not try to access the file
return Ok(());
}
// Make sure the file opens successfully before sending a success header.
let mut file = match tokio::fs::File::open(&path).await { let mut file = match tokio::fs::File::open(&path).await {
Ok(file) => file, Ok(file) => file,
Err(e) => { Err(e) => {
@ -327,21 +334,21 @@ impl RequestHandle {
}; };
// Send header. // Send header.
let mut locked = self.metadata.write().await; let mime = match data {
let data = locked.get(&path); // this was already handled before opening the file
let mime = if data.is_empty() || data.starts_with(';') { PresetMeta::FullHeader(..) => unreachable!(),
// guess MIME type // treat this as the full MIME type
if path.extension() == Some(OsStr::new("gmi")) { PresetMeta::FullMime(mime) => mime.clone(),
format!("text/gemini{}", data) // guess the MIME type and add the parameters
} else { PresetMeta::Parameters(params) => {
let mime = mime_guess::from_path(&path).first_or_octet_stream(); if path.extension() == Some(OsStr::new("gmi")) {
format!("{}{}", mime.essence_str(), data) format!("text/gemini{}", params)
} else {
let mime = mime_guess::from_path(&path).first_or_octet_stream();
format!("{}{}", mime.essence_str(), params)
}
} }
} else {
// this must be a full MIME type
data.to_owned()
}; };
drop(locked);
self.send_header(20, &mime).await?; self.send_header(20, &mime).await?;
// Send body. // Send body.

View file

@ -3,6 +3,8 @@ use std::io::{BufRead, BufReader};
use std::path::PathBuf; use std::path::PathBuf;
use std::time::SystemTime; use std::time::SystemTime;
static SIDECAR_FILENAME: &str = ".meta";
/// A struct to store a string of metadata for each file retrieved from /// A struct to store a string of metadata for each file retrieved from
/// sidecar files called `.lang`. /// sidecar files called `.lang`.
/// ///
@ -21,19 +23,47 @@ pub(crate) struct FileOptions {
/// has changed. /// has changed.
databases_read: BTreeMap<PathBuf, SystemTime>, databases_read: BTreeMap<PathBuf, SystemTime>,
/// Stores the metadata for each file /// Stores the metadata for each file
file_meta: BTreeMap<PathBuf, String>, file_meta: BTreeMap<PathBuf, PresetMeta>,
/// The default value to return /// The default value to return
default: String, default: PresetMeta,
} }
static SIDECAR_FILENAME: &str = ".mime"; /// A struct to store the different alternatives that a line in the sidecar
/// file can have.
#[derive(Clone, Debug)]
pub(crate) enum PresetMeta {
/// A line that starts with a semicolon in the sidecar file, or an
/// empty line (to overwrite the default language command line flag).
/// ```text
/// index.gmi: ;lang=en-GB
/// ```
/// The content is interpreted as MIME parameters and are appended to what
/// agate guesses as the MIME type if the respective file can be found.
Parameters(String),
/// A line that is neither a `Parameters` line nor a `FullHeader` line.
/// ```text
/// strange.file: text/plain; lang=ee
/// ```
/// Agate will send the complete line as the MIME type of the request if
/// the respective file can be found (i.e. a `20` status code).
FullMime(String),
/// A line that starts with a digit between 1 and 6 inclusive followed by
/// another digit and a space (U+0020). In the categories defined by the
/// Gemini specification you can pick a defined or non-defined status code.
/// ```text
/// gone.gmi: 52 This file is no longer available.
/// ```
/// Agate will send this header line, CR, LF, and nothing else. Agate will
/// not try to access the requested file.
FullHeader(u8, String),
}
impl FileOptions { impl FileOptions {
pub(crate) fn new(default: &str) -> Self { pub(crate) fn new(default: PresetMeta) -> Self {
Self { Self {
databases_read: BTreeMap::new(), databases_read: BTreeMap::new(),
file_meta: BTreeMap::new(), file_meta: BTreeMap::new(),
default: default.to_string(), default,
} }
} }
@ -93,7 +123,41 @@ impl FileOptions {
// generate workspace-unique path // generate workspace-unique path
let mut path = db_dir.clone(); let mut path = db_dir.clone();
path.push(parts[0].trim()); path.push(parts[0].trim());
self.file_meta.insert(path, parts[1].trim().to_string()); // parse the line
let header = parts[1].trim();
let preset = if header.is_empty() || header.starts_with(';') {
PresetMeta::Parameters(header.to_string())
} else if matches!(header.chars().next(), Some('1'..='6')) {
if header.len() < 3
|| !header.chars().nth(1).unwrap().is_ascii_digit()
|| !header.chars().nth(2).unwrap().is_whitespace()
{
log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path);
return;
}
let separator = header.chars().nth(2).unwrap();
if separator != ' ' {
// the Gemini specification says that the third
// character has to be a space, so correct any
// other whitespace to it (e.g. tabs)
log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator);
}
let status = header.chars()
.take(2)
.collect::<String>()
.parse::<u8>()
// unwrap since we alread checked it's a number
.unwrap();
// not taking a slice here because the separator
// might be a whitespace wider than a byte
let meta = header.chars().skip(3).collect::<String>();
PresetMeta::FullHeader(status, meta)
} else {
// must be a MIME type, but without status code
PresetMeta::FullMime(header.to_string())
};
self.file_meta.insert(path, preset);
} }
}); });
self.databases_read self.databases_read
@ -106,12 +170,12 @@ impl FileOptions {
/// The file path should consistenly be either absolute or relative to the /// The file path should consistenly be either absolute or relative to the
/// working/content directory. If inconsisten file paths are used, this can /// working/content directory. If inconsisten file paths are used, this can
/// lead to loading and storing sidecar files multiple times. /// lead to loading and storing sidecar files multiple times.
pub fn get(&mut self, file: &PathBuf) -> &str { pub fn get(&mut self, file: &PathBuf) -> PresetMeta {
let dir = file.parent().expect("no parent directory").to_path_buf(); let dir = file.parent().expect("no parent directory").to_path_buf();
if self.check_outdated(&dir) { if self.check_outdated(&dir) {
self.read_database(&dir); self.read_database(&dir);
} }
self.file_meta.get(file).unwrap_or(&self.default) self.file_meta.get(file).unwrap_or(&self.default).clone()
} }
} }