separate torrent storage features

This commit is contained in:
yggverse 2025-07-07 01:43:15 +03:00
parent 25a226eb0f
commit b30be0e9f8
5 changed files with 56 additions and 48 deletions

View file

@ -23,7 +23,7 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke
* [ ] remote URL
* Export options
* [x] File system (`--storage`)
* [x] resolve infohash to the `.torrent` file (`--save-torrents`)
* [x] resolve infohash to the `.torrent` file (`--export-torrents`)
* [x] download content files match the regex pattern (`--preload-regex`)
* [x] RSS feed (includes resolved torrent meta and magnet links to download)
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
@ -76,6 +76,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
--initial-peer <INITIAL_PEER>
Define initial peer(s) to preload the `.torrent` files info
--export-torrents <EXPORT_TORRENTS>
Save resolved torrent files to given directory
--export-rss <EXPORT_RSS>
File path to export RSS feed
@ -121,9 +124,6 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
Max count of preloaded files per torrent (match `preload_regex`)
--save-torrents
Save resolved torrent files to the `storage` location
--proxy-url <PROXY_URL>
Use `socks5://[username:password@]host:port`

View file

@ -33,6 +33,10 @@ pub struct Argument {
#[arg(long)]
pub initial_peer: Vec<String>,
/// Save resolved torrent files to given directory
#[arg(long)]
pub export_torrents: Option<String>,
/// File path to export RSS feed
#[arg(long)]
pub export_rss: Option<String>,
@ -91,10 +95,6 @@ pub struct Argument {
#[arg(long)]
pub preload_max_filecount: Option<usize>,
/// Save resolved torrent files to the `storage` location
#[arg(long, default_value_t = false)]
pub save_torrents: bool,
/// Use `socks5://[username:password@]host:port`
#[arg(long)]
pub proxy_url: Option<String>,

View file

@ -5,6 +5,7 @@ mod index;
mod peers;
mod rss;
mod storage;
mod torrent;
mod trackers;
use anyhow::Result;
@ -13,6 +14,7 @@ use index::Index;
use rss::Rss;
use std::{collections::HashSet, num::NonZero, time::Duration};
use storage::Storage;
use torrent::Torrent;
use url::Url;
#[tokio::main]
@ -30,6 +32,7 @@ async fn main() -> Result<()> {
let peers = peers::Peers::init(&arg.initial_peer)?;
let storage = Storage::init(&arg.storage, arg.clear)?;
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
let torrent = arg.export_torrents.map(|p| Torrent::init(&p).unwrap());
let preload_regex = arg.preload_regex.map(|ref r| regex::Regex::new(r).unwrap());
let session = librqbit::Session::new_with_opts(
storage.path(),
@ -140,17 +143,9 @@ async fn main() -> Result<()> {
}
}
}
// dump info-hash to the torrent file
if arg.save_torrents {
save_torrent_file(
&storage,
&debug,
&i,
&m.torrent_bytes,
)
if let Some(ref t) = torrent {
save_torrent_file(t, &debug, &i, &m.torrent_bytes)
}
// @TODO
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
m.info.name.as_ref().map(|n|n.to_string())
})?;
session.update_only_files(&mt, &only_files).await?;
@ -167,8 +162,8 @@ async fn main() -> Result<()> {
index.insert(i, only_files_size, name)
}
Ok(AddTorrentResponse::ListOnly(r)) => {
if arg.save_torrents {
save_torrent_file(&storage, &debug, &i, &r.torrent_bytes)
if let Some(ref t) = torrent {
save_torrent_file(t, &debug, &i, &r.torrent_bytes)
}
// @TODO
@ -220,14 +215,14 @@ async fn main() -> Result<()> {
}
}
fn save_torrent_file(s: &Storage, d: &Debug, i: &str, b: &[u8]) {
if s.torrent_exists(i) {
d.info(&format!("Torrent file `{i}` already exists, skip"))
} else {
match s.save_torrent(i, b) {
Ok(r) => d.info(&format!("Add torrent file `{}`", r.to_string_lossy())),
Err(e) => d.error(&e.to_string()),
}
/// Shared handler function to save resolved torrents as file
fn save_torrent_file(t: &Torrent, d: &Debug, i: &str, b: &[u8]) {
match t.persist(i, b) {
Ok(r) => match r {
Some(p) => d.info(&format!("Add torrent file `{}`", p.to_string_lossy())),
None => d.info(&format!("Torrent file `{i}` already exists")),
},
Err(e) => d.error(&format!("Error on save torrent file `{i}`: {e}")),
}
}

View file

@ -1,5 +1,5 @@
use anyhow::{Result, bail};
use std::{fs, io::Write, path::PathBuf, str::FromStr};
use std::{fs, path::PathBuf, str::FromStr};
pub struct Storage(PathBuf);
@ -25,18 +25,6 @@ impl Storage {
Ok(Self(p))
}
pub fn torrent_exists(&self, infohash: &str) -> bool {
fs::metadata(self.torrent(infohash))
.is_ok_and(|p| p.is_file() || p.is_dir() || p.is_symlink())
}
pub fn save_torrent(&self, infohash: &str, bytes: &[u8]) -> Result<PathBuf> {
let p = self.torrent(infohash);
let mut f = fs::File::create(&p)?;
f.write_all(bytes)?;
Ok(p)
}
pub fn output_folder(&self, infohash: &str, create: bool) -> Result<String> {
let mut p = PathBuf::new();
p.push(&self.0);
@ -76,11 +64,4 @@ impl Storage {
pub fn path(&self) -> PathBuf {
self.0.clone()
}
fn torrent(&self, infohash: &str) -> PathBuf {
let mut p = PathBuf::new();
p.push(&self.0);
p.push(format!("{infohash}.torrent"));
p
}
}

32
src/torrent.rs Normal file
View file

@ -0,0 +1,32 @@
use anyhow::Result;
use std::{fs, io::Write, path::PathBuf, str::FromStr};
pub struct Torrent {
storage: PathBuf,
}
impl Torrent {
pub fn init(path: &str) -> Result<Self> {
Ok(Self {
storage: PathBuf::from_str(path)?.canonicalize()?,
})
}
pub fn persist(&self, infohash: &str, data: &[u8]) -> Result<Option<PathBuf>> {
Ok(if self.path(infohash).exists() {
None
} else {
let p = self.path(infohash);
let mut f = fs::File::create(&p)?;
f.write_all(data)?;
Some(p)
})
}
fn path(&self, infohash: &str) -> PathBuf {
let mut p = PathBuf::new();
p.push(&self.storage);
p.push(format!("{infohash}.torrent"));
p
}
}