implement shared crawler feature

This commit is contained in:
yggverse 2025-09-12 13:47:15 +03:00
parent b3e5d59e68
commit dacf14c9d9
4 changed files with 162 additions and 4 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "btracker-fs"
version = "0.2.0"
version = "0.3.0"
edition = "2024"
license = "MIT"
readme = "README.md"
@ -13,7 +13,11 @@ repository = "https://github.com/yggverse/btracker-fs"
[features]
# default = ["public"]
public = []
crawler = []
[dependencies]
chrono = { version = "0.4.41", features = ["serde"] }
librqbit-core = "5.0.0"
log = "0.4.28"
regex = "1.11.2"

146
src/crawler.rs Normal file
View file

@ -0,0 +1,146 @@
//! Backend features for the following βtracker project components:
//!
//! * https://github.com/YGGverse/aquatic-crawler
use regex::Regex;
use std::{collections::HashSet, fs, io::Error, path::PathBuf};
pub struct Storage {
root: PathBuf,
pub max_filecount: Option<usize>,
pub max_filesize: Option<u64>,
pub regex: Option<Regex>,
}
impl Storage {
// Constructors
pub fn init(
root: PathBuf,
regex: Option<Regex>,
max_filecount: Option<usize>,
max_filesize: Option<u64>,
) -> Result<Self, String> {
// make sure given path is valid and exist
if !root.is_dir() {
return Err("Storage root is not directory".into());
}
Ok(Self {
max_filecount,
max_filesize,
regex,
root: root.canonicalize().map_err(|e| e.to_string())?,
})
}
// Actions
/// Persist torrent bytes and preloaded content,
/// cleanup tmp data on success (see rqbit#408)
pub fn commit(
&self,
info_hash: &str,
torrent_bytes: Vec<u8>,
persist_files: Option<HashSet<PathBuf>>,
) -> Result<(), Error> {
// persist preloaded files
let permanent_dir = self.permanent_dir(info_hash, true)?;
// init temporary path without creating the dir (delegate to `librqbit`)
let tmp_dir = self.tmp_dir(info_hash, false)?;
if let Some(files) = persist_files {
let components_count = permanent_dir.components().count(); // count root offset once
for file in files {
// build the absolute path for the relative torrent filename
let tmp_file = {
let mut p = PathBuf::from(&tmp_dir);
p.push(file);
p.canonicalize()?
};
// make sure preload path is referring to the expected location
assert!(tmp_file.starts_with(&self.root) && !tmp_file.is_dir());
// build new permanent path /root/info-hash
let mut permanent_file = PathBuf::from(&permanent_dir);
for component in tmp_file.components().skip(components_count) {
permanent_file.push(component)
}
// make sure segments count is same to continue
assert!(tmp_file.components().count() == permanent_file.components().count());
// move `persist_files` from temporary to permanent location
fs::create_dir_all(permanent_file.parent().unwrap())?;
fs::rename(&tmp_file, &permanent_file)?;
log::debug!(
"persist tmp file `{}` to `{}`",
tmp_file.to_string_lossy(),
permanent_file.to_string_lossy()
);
}
}
// cleanup temporary data
if tmp_dir.exists() {
fs::remove_dir_all(&tmp_dir)?;
log::debug!("clean tmp data `{}`", tmp_dir.to_string_lossy())
}
// persist torrent bytes to file (on previous operations success)
let torrent_file = self.torrent(info_hash);
fs::write(&torrent_file, torrent_bytes)?;
log::debug!(
"persist torrent bytes for `{}`",
torrent_file.to_string_lossy()
);
Ok(())
}
// Actions
/// Build the absolute path to the temporary directory
/// * optionally creates directory if not exists
pub fn tmp_dir(&self, info_hash: &str, is_create: bool) -> Result<PathBuf, Error> {
let mut p = PathBuf::from(&self.root);
p.push(tmp_component(info_hash));
assert!(!p.is_file());
if is_create && !p.exists() {
fs::create_dir(&p)?;
log::debug!("create tmp directory `{}`", p.to_string_lossy())
}
Ok(p)
}
/// Build the absolute path to the permanent directory
/// * optionally removes directory with its content
fn permanent_dir(&self, info_hash: &str, is_clear: bool) -> Result<PathBuf, Error> {
let mut p = PathBuf::from(&self.root);
p.push(info_hash);
assert!(!p.is_file());
if is_clear && p.exists() {
// clean previous data
fs::remove_dir_all(&p)?;
log::debug!("clean previous data `{}`", p.to_string_lossy())
}
Ok(p)
}
// Getters
/// Get root location for `Self`
pub fn root(&self) -> &PathBuf {
&self.root
}
/// Check the given hash is contain resolved torrent file
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool, Error> {
Ok(fs::exists(self.torrent(info_hash))?)
}
/// Get absolute path to the torrent file
fn torrent(&self, info_hash: &str) -> PathBuf {
let mut p = PathBuf::from(&self.root);
p.push(format!("{info_hash}.torrent"));
assert!(!p.is_dir());
p
}
}
/// Build constant path component
fn tmp_component(info_hash: &str) -> String {
format!(".{info_hash}")
}

View file

@ -1,2 +1,5 @@
#[cfg(feature = "public")]
pub mod public;
#[cfg(feature = "crawler")]
pub mod crawler;

View file

@ -1,3 +1,8 @@
//! Frontend features for the following βtracker project components:
//!
//! * https://github.com/YGGverse/btracker
//! * https://github.com/YGGverse/btracker-gemini
use chrono::{DateTime, Utc};
use std::{fs, io::Error, path::PathBuf, time::SystemTime};
@ -19,13 +24,13 @@ pub struct Torrent {
pub time: DateTime<Utc>,
}
pub struct Public {
pub struct Storage {
default_capacity: usize,
pub default_limit: usize,
root: PathBuf,
}
impl Public {
impl Storage {
// Constructors
pub fn init(