mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
use shared btracker-fs / crawler feature for the preload implementation
This commit is contained in:
parent
e3e1dfd4c1
commit
3d51579354
3 changed files with 9 additions and 148 deletions
|
|
@ -12,6 +12,7 @@ repository = "https://github.com/YGGverse/aquatic-crawler"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
|
btracker-fs = { version = "0.3", features = ["crawler"] }
|
||||||
chrono = "0.4.41"
|
chrono = "0.4.41"
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
|
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
|
||||||
|
|
@ -23,5 +24,8 @@ url = "2.5"
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
|
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
|
btracker-fs = { git = "https://github.com/yggverse/btracker-fs.git" }
|
||||||
|
#btracker-fs = { path = "../btracker-fs" }
|
||||||
|
|
||||||
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
||||||
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }
|
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }
|
||||||
10
src/main.rs
10
src/main.rs
|
|
@ -1,20 +1,19 @@
|
||||||
mod api;
|
mod api;
|
||||||
mod config;
|
mod config;
|
||||||
mod preload;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use config::Config;
|
|
||||||
use librqbit::{
|
use librqbit::{
|
||||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, Session, SessionOptions,
|
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, Session, SessionOptions,
|
||||||
};
|
};
|
||||||
use preload::Preload;
|
|
||||||
use std::{collections::HashSet, num::NonZero, time::Duration};
|
use std::{collections::HashSet, num::NonZero, time::Duration};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
|
use btracker_fs::crawler::Storage;
|
||||||
use chrono::Local;
|
use chrono::Local;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use config::Config;
|
||||||
use tokio::time;
|
use tokio::time;
|
||||||
// debug
|
// debug
|
||||||
if std::env::var("RUST_LOG").is_ok() {
|
if std::env::var("RUST_LOG").is_ok() {
|
||||||
|
|
@ -30,12 +29,13 @@ async fn main() -> Result<()> {
|
||||||
// init components
|
// init components
|
||||||
let time_init = Local::now();
|
let time_init = Local::now();
|
||||||
let config = Config::parse();
|
let config = Config::parse();
|
||||||
let preload = Preload::init(
|
let preload = Storage::init(
|
||||||
config.preload,
|
config.preload,
|
||||||
config.preload_regex,
|
config.preload_regex,
|
||||||
config.preload_max_filecount,
|
config.preload_max_filecount,
|
||||||
config.preload_max_filesize,
|
config.preload_max_filesize,
|
||||||
)?;
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let mut ban = HashSet::with_capacity(config.index_capacity);
|
let mut ban = HashSet::with_capacity(config.index_capacity);
|
||||||
log::info!("crawler started at {time_init}");
|
log::info!("crawler started at {time_init}");
|
||||||
|
|
|
||||||
143
src/preload.rs
143
src/preload.rs
|
|
@ -1,143 +0,0 @@
|
||||||
use anyhow::{Result, bail};
|
|
||||||
use regex::Regex;
|
|
||||||
use std::{collections::HashSet, fs, path::PathBuf};
|
|
||||||
|
|
||||||
pub struct Preload {
|
|
||||||
root: PathBuf,
|
|
||||||
pub max_filecount: Option<usize>,
|
|
||||||
pub max_filesize: Option<u64>,
|
|
||||||
pub regex: Option<Regex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Preload {
|
|
||||||
// Constructors
|
|
||||||
|
|
||||||
pub fn init(
|
|
||||||
root: PathBuf,
|
|
||||||
regex: Option<Regex>,
|
|
||||||
max_filecount: Option<usize>,
|
|
||||||
max_filesize: Option<u64>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
// make sure given path is valid and exist
|
|
||||||
if !root.canonicalize()?.is_dir() {
|
|
||||||
bail!("Preload root is not directory")
|
|
||||||
}
|
|
||||||
Ok(Self {
|
|
||||||
max_filecount,
|
|
||||||
max_filesize,
|
|
||||||
regex,
|
|
||||||
root,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actions
|
|
||||||
|
|
||||||
/// Persist torrent bytes and preloaded content,
|
|
||||||
/// cleanup tmp data on success (see rqbit#408)
|
|
||||||
pub fn commit(
|
|
||||||
&self,
|
|
||||||
info_hash: &str,
|
|
||||||
torrent_bytes: Vec<u8>,
|
|
||||||
persist_files: Option<HashSet<PathBuf>>,
|
|
||||||
) -> Result<()> {
|
|
||||||
// persist preloaded files
|
|
||||||
let permanent_dir = self.permanent_dir(info_hash, true)?;
|
|
||||||
// init temporary path without creating the dir (delegate to `librqbit`)
|
|
||||||
let tmp_dir = self.tmp_dir(info_hash, false)?;
|
|
||||||
if let Some(files) = persist_files {
|
|
||||||
let components_count = permanent_dir.components().count(); // count root offset once
|
|
||||||
for file in files {
|
|
||||||
// build the absolute path for the relative torrent filename
|
|
||||||
let tmp_file = {
|
|
||||||
let mut p = PathBuf::from(&tmp_dir);
|
|
||||||
p.push(file);
|
|
||||||
p.canonicalize()?
|
|
||||||
};
|
|
||||||
// make sure preload path is referring to the expected location
|
|
||||||
assert!(tmp_file.starts_with(&self.root) && !tmp_file.is_dir());
|
|
||||||
// build new permanent path /root/info-hash
|
|
||||||
let mut permanent_file = PathBuf::from(&permanent_dir);
|
|
||||||
for component in tmp_file.components().skip(components_count) {
|
|
||||||
permanent_file.push(component)
|
|
||||||
}
|
|
||||||
// make sure segments count is same to continue
|
|
||||||
assert!(tmp_file.components().count() == permanent_file.components().count());
|
|
||||||
// move `persist_files` from temporary to permanent location
|
|
||||||
fs::create_dir_all(permanent_file.parent().unwrap())?;
|
|
||||||
fs::rename(&tmp_file, &permanent_file)?;
|
|
||||||
log::debug!(
|
|
||||||
"persist tmp file `{}` to `{}`",
|
|
||||||
tmp_file.to_string_lossy(),
|
|
||||||
permanent_file.to_string_lossy()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// cleanup temporary data
|
|
||||||
if tmp_dir.exists() {
|
|
||||||
fs::remove_dir_all(&tmp_dir)?;
|
|
||||||
log::debug!("clean tmp data `{}`", tmp_dir.to_string_lossy())
|
|
||||||
}
|
|
||||||
// persist torrent bytes to file (on previous operations success)
|
|
||||||
let torrent_file = self.torrent(info_hash);
|
|
||||||
fs::write(&torrent_file, torrent_bytes)?;
|
|
||||||
log::debug!(
|
|
||||||
"persist torrent bytes for `{}`",
|
|
||||||
torrent_file.to_string_lossy()
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actions
|
|
||||||
|
|
||||||
/// Build the absolute path to the temporary directory
|
|
||||||
/// * optionally creates directory if not exists
|
|
||||||
pub fn tmp_dir(&self, info_hash: &str, is_create: bool) -> Result<PathBuf> {
|
|
||||||
let mut p = PathBuf::from(&self.root);
|
|
||||||
p.push(tmp_component(info_hash));
|
|
||||||
assert!(!p.is_file());
|
|
||||||
if is_create && !p.exists() {
|
|
||||||
fs::create_dir(&p)?;
|
|
||||||
log::debug!("create tmp directory `{}`", p.to_string_lossy())
|
|
||||||
}
|
|
||||||
Ok(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build the absolute path to the permanent directory
|
|
||||||
/// * optionally removes directory with its content
|
|
||||||
fn permanent_dir(&self, info_hash: &str, is_clear: bool) -> Result<PathBuf> {
|
|
||||||
let mut p = PathBuf::from(&self.root);
|
|
||||||
p.push(info_hash);
|
|
||||||
assert!(!p.is_file());
|
|
||||||
if is_clear && p.exists() {
|
|
||||||
// clean previous data
|
|
||||||
fs::remove_dir_all(&p)?;
|
|
||||||
log::debug!("clean previous data `{}`", p.to_string_lossy())
|
|
||||||
}
|
|
||||||
Ok(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Getters
|
|
||||||
|
|
||||||
/// Get root location for `Self`
|
|
||||||
pub fn root(&self) -> &PathBuf {
|
|
||||||
&self.root
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check the given hash is contain resolved torrent file
|
|
||||||
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
|
|
||||||
Ok(fs::exists(self.torrent(info_hash))?)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get absolute path to the torrent file
|
|
||||||
fn torrent(&self, info_hash: &str) -> PathBuf {
|
|
||||||
let mut p = PathBuf::from(&self.root);
|
|
||||||
p.push(format!("{info_hash}.torrent"));
|
|
||||||
assert!(!p.is_dir());
|
|
||||||
p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build constant path component
|
|
||||||
fn tmp_component(info_hash: &str) -> String {
|
|
||||||
format!(".{info_hash}")
|
|
||||||
}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue