use shared btracker-fs / crawler feature for the preload implementation

This commit is contained in:
yggverse 2025-09-12 13:50:23 +03:00
parent e3e1dfd4c1
commit 3d51579354
3 changed files with 9 additions and 148 deletions

View file

@ -12,6 +12,7 @@ repository = "https://github.com/YGGverse/aquatic-crawler"
[dependencies]
anyhow = "1.0"
btracker-fs = { version = "0.3", features = ["crawler"] }
chrono = "0.4.41"
clap = { version = "4.5", features = ["derive"] }
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
@ -23,5 +24,8 @@ url = "2.5"
urlencoding = "2.1"
[patch.crates-io]
btracker-fs = { git = "https://github.com/yggverse/btracker-fs.git" }
#btracker-fs = { path = "../btracker-fs" }
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }

View file

@ -1,20 +1,19 @@
mod api;
mod config;
mod preload;
use anyhow::Result;
use config::Config;
use librqbit::{
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, Session, SessionOptions,
};
use preload::Preload;
use std::{collections::HashSet, num::NonZero, time::Duration};
use url::Url;
#[tokio::main]
async fn main() -> Result<()> {
use btracker_fs::crawler::Storage;
use chrono::Local;
use clap::Parser;
use config::Config;
use tokio::time;
// debug
if std::env::var("RUST_LOG").is_ok() {
@ -30,12 +29,13 @@ async fn main() -> Result<()> {
// init components
let time_init = Local::now();
let config = Config::parse();
let preload = Preload::init(
let preload = Storage::init(
config.preload,
config.preload_regex,
config.preload_max_filecount,
config.preload_max_filesize,
)?;
)
.unwrap();
let mut ban = HashSet::with_capacity(config.index_capacity);
log::info!("crawler started at {time_init}");

View file

@ -1,143 +0,0 @@
use anyhow::{Result, bail};
use regex::Regex;
use std::{collections::HashSet, fs, path::PathBuf};
pub struct Preload {
root: PathBuf,
pub max_filecount: Option<usize>,
pub max_filesize: Option<u64>,
pub regex: Option<Regex>,
}
impl Preload {
// Constructors
pub fn init(
root: PathBuf,
regex: Option<Regex>,
max_filecount: Option<usize>,
max_filesize: Option<u64>,
) -> Result<Self> {
// make sure given path is valid and exist
if !root.canonicalize()?.is_dir() {
bail!("Preload root is not directory")
}
Ok(Self {
max_filecount,
max_filesize,
regex,
root,
})
}
// Actions
/// Persist torrent bytes and preloaded content,
/// cleanup tmp data on success (see rqbit#408)
pub fn commit(
&self,
info_hash: &str,
torrent_bytes: Vec<u8>,
persist_files: Option<HashSet<PathBuf>>,
) -> Result<()> {
// persist preloaded files
let permanent_dir = self.permanent_dir(info_hash, true)?;
// init temporary path without creating the dir (delegate to `librqbit`)
let tmp_dir = self.tmp_dir(info_hash, false)?;
if let Some(files) = persist_files {
let components_count = permanent_dir.components().count(); // count root offset once
for file in files {
// build the absolute path for the relative torrent filename
let tmp_file = {
let mut p = PathBuf::from(&tmp_dir);
p.push(file);
p.canonicalize()?
};
// make sure preload path is referring to the expected location
assert!(tmp_file.starts_with(&self.root) && !tmp_file.is_dir());
// build new permanent path /root/info-hash
let mut permanent_file = PathBuf::from(&permanent_dir);
for component in tmp_file.components().skip(components_count) {
permanent_file.push(component)
}
// make sure segments count is same to continue
assert!(tmp_file.components().count() == permanent_file.components().count());
// move `persist_files` from temporary to permanent location
fs::create_dir_all(permanent_file.parent().unwrap())?;
fs::rename(&tmp_file, &permanent_file)?;
log::debug!(
"persist tmp file `{}` to `{}`",
tmp_file.to_string_lossy(),
permanent_file.to_string_lossy()
);
}
}
// cleanup temporary data
if tmp_dir.exists() {
fs::remove_dir_all(&tmp_dir)?;
log::debug!("clean tmp data `{}`", tmp_dir.to_string_lossy())
}
// persist torrent bytes to file (on previous operations success)
let torrent_file = self.torrent(info_hash);
fs::write(&torrent_file, torrent_bytes)?;
log::debug!(
"persist torrent bytes for `{}`",
torrent_file.to_string_lossy()
);
Ok(())
}
// Actions
/// Build the absolute path to the temporary directory
/// * optionally creates directory if not exists
pub fn tmp_dir(&self, info_hash: &str, is_create: bool) -> Result<PathBuf> {
let mut p = PathBuf::from(&self.root);
p.push(tmp_component(info_hash));
assert!(!p.is_file());
if is_create && !p.exists() {
fs::create_dir(&p)?;
log::debug!("create tmp directory `{}`", p.to_string_lossy())
}
Ok(p)
}
/// Build the absolute path to the permanent directory
/// * optionally removes directory with its content
fn permanent_dir(&self, info_hash: &str, is_clear: bool) -> Result<PathBuf> {
let mut p = PathBuf::from(&self.root);
p.push(info_hash);
assert!(!p.is_file());
if is_clear && p.exists() {
// clean previous data
fs::remove_dir_all(&p)?;
log::debug!("clean previous data `{}`", p.to_string_lossy())
}
Ok(p)
}
// Getters
/// Get root location for `Self`
pub fn root(&self) -> &PathBuf {
&self.root
}
/// Check the given hash is contain resolved torrent file
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
Ok(fs::exists(self.torrent(info_hash))?)
}
/// Get absolute path to the torrent file
fn torrent(&self, info_hash: &str) -> PathBuf {
let mut p = PathBuf::from(&self.root);
p.push(format!("{info_hash}.torrent"));
assert!(!p.is_dir());
p
}
}
/// Build constant path component
fn tmp_component(info_hash: &str) -> String {
format!(".{info_hash}")
}