mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
separate torrent storage features
This commit is contained in:
parent
25a226eb0f
commit
b30be0e9f8
5 changed files with 56 additions and 48 deletions
|
|
@ -23,7 +23,7 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke
|
||||||
* [ ] remote URL
|
* [ ] remote URL
|
||||||
* Export options
|
* Export options
|
||||||
* [x] File system (`--storage`)
|
* [x] File system (`--storage`)
|
||||||
* [x] resolve infohash to the `.torrent` file (`--save-torrents`)
|
* [x] resolve infohash to the `.torrent` file (`--export-torrents`)
|
||||||
* [x] download content files match the regex pattern (`--preload-regex`)
|
* [x] download content files match the regex pattern (`--preload-regex`)
|
||||||
* [x] RSS feed (includes resolved torrent meta and magnet links to download)
|
* [x] RSS feed (includes resolved torrent meta and magnet links to download)
|
||||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
||||||
|
|
@ -76,6 +76,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
--initial-peer <INITIAL_PEER>
|
--initial-peer <INITIAL_PEER>
|
||||||
Define initial peer(s) to preload the `.torrent` files info
|
Define initial peer(s) to preload the `.torrent` files info
|
||||||
|
|
||||||
|
--export-torrents <EXPORT_TORRENTS>
|
||||||
|
Save resolved torrent files to given directory
|
||||||
|
|
||||||
--export-rss <EXPORT_RSS>
|
--export-rss <EXPORT_RSS>
|
||||||
File path to export RSS feed
|
File path to export RSS feed
|
||||||
|
|
||||||
|
|
@ -121,9 +124,6 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
|
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
|
||||||
Max count of preloaded files per torrent (match `preload_regex`)
|
Max count of preloaded files per torrent (match `preload_regex`)
|
||||||
|
|
||||||
--save-torrents
|
|
||||||
Save resolved torrent files to the `storage` location
|
|
||||||
|
|
||||||
--proxy-url <PROXY_URL>
|
--proxy-url <PROXY_URL>
|
||||||
Use `socks5://[username:password@]host:port`
|
Use `socks5://[username:password@]host:port`
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,10 @@ pub struct Argument {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub initial_peer: Vec<String>,
|
pub initial_peer: Vec<String>,
|
||||||
|
|
||||||
|
/// Save resolved torrent files to given directory
|
||||||
|
#[arg(long)]
|
||||||
|
pub export_torrents: Option<String>,
|
||||||
|
|
||||||
/// File path to export RSS feed
|
/// File path to export RSS feed
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub export_rss: Option<String>,
|
pub export_rss: Option<String>,
|
||||||
|
|
@ -91,10 +95,6 @@ pub struct Argument {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub preload_max_filecount: Option<usize>,
|
pub preload_max_filecount: Option<usize>,
|
||||||
|
|
||||||
/// Save resolved torrent files to the `storage` location
|
|
||||||
#[arg(long, default_value_t = false)]
|
|
||||||
pub save_torrents: bool,
|
|
||||||
|
|
||||||
/// Use `socks5://[username:password@]host:port`
|
/// Use `socks5://[username:password@]host:port`
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub proxy_url: Option<String>,
|
pub proxy_url: Option<String>,
|
||||||
|
|
|
||||||
35
src/main.rs
35
src/main.rs
|
|
@ -5,6 +5,7 @@ mod index;
|
||||||
mod peers;
|
mod peers;
|
||||||
mod rss;
|
mod rss;
|
||||||
mod storage;
|
mod storage;
|
||||||
|
mod torrent;
|
||||||
mod trackers;
|
mod trackers;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
@ -13,6 +14,7 @@ use index::Index;
|
||||||
use rss::Rss;
|
use rss::Rss;
|
||||||
use std::{collections::HashSet, num::NonZero, time::Duration};
|
use std::{collections::HashSet, num::NonZero, time::Duration};
|
||||||
use storage::Storage;
|
use storage::Storage;
|
||||||
|
use torrent::Torrent;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|
@ -30,6 +32,7 @@ async fn main() -> Result<()> {
|
||||||
let peers = peers::Peers::init(&arg.initial_peer)?;
|
let peers = peers::Peers::init(&arg.initial_peer)?;
|
||||||
let storage = Storage::init(&arg.storage, arg.clear)?;
|
let storage = Storage::init(&arg.storage, arg.clear)?;
|
||||||
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
|
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
|
||||||
|
let torrent = arg.export_torrents.map(|p| Torrent::init(&p).unwrap());
|
||||||
let preload_regex = arg.preload_regex.map(|ref r| regex::Regex::new(r).unwrap());
|
let preload_regex = arg.preload_regex.map(|ref r| regex::Regex::new(r).unwrap());
|
||||||
let session = librqbit::Session::new_with_opts(
|
let session = librqbit::Session::new_with_opts(
|
||||||
storage.path(),
|
storage.path(),
|
||||||
|
|
@ -140,17 +143,9 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// dump info-hash to the torrent file
|
if let Some(ref t) = torrent {
|
||||||
if arg.save_torrents {
|
save_torrent_file(t, &debug, &i, &m.torrent_bytes)
|
||||||
save_torrent_file(
|
|
||||||
&storage,
|
|
||||||
&debug,
|
|
||||||
&i,
|
|
||||||
&m.torrent_bytes,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
// @TODO
|
|
||||||
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
|
||||||
m.info.name.as_ref().map(|n|n.to_string())
|
m.info.name.as_ref().map(|n|n.to_string())
|
||||||
})?;
|
})?;
|
||||||
session.update_only_files(&mt, &only_files).await?;
|
session.update_only_files(&mt, &only_files).await?;
|
||||||
|
|
@ -167,8 +162,8 @@ async fn main() -> Result<()> {
|
||||||
index.insert(i, only_files_size, name)
|
index.insert(i, only_files_size, name)
|
||||||
}
|
}
|
||||||
Ok(AddTorrentResponse::ListOnly(r)) => {
|
Ok(AddTorrentResponse::ListOnly(r)) => {
|
||||||
if arg.save_torrents {
|
if let Some(ref t) = torrent {
|
||||||
save_torrent_file(&storage, &debug, &i, &r.torrent_bytes)
|
save_torrent_file(t, &debug, &i, &r.torrent_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// @TODO
|
// @TODO
|
||||||
|
|
@ -220,14 +215,14 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_torrent_file(s: &Storage, d: &Debug, i: &str, b: &[u8]) {
|
/// Shared handler function to save resolved torrents as file
|
||||||
if s.torrent_exists(i) {
|
fn save_torrent_file(t: &Torrent, d: &Debug, i: &str, b: &[u8]) {
|
||||||
d.info(&format!("Torrent file `{i}` already exists, skip"))
|
match t.persist(i, b) {
|
||||||
} else {
|
Ok(r) => match r {
|
||||||
match s.save_torrent(i, b) {
|
Some(p) => d.info(&format!("Add torrent file `{}`", p.to_string_lossy())),
|
||||||
Ok(r) => d.info(&format!("Add torrent file `{}`", r.to_string_lossy())),
|
None => d.info(&format!("Torrent file `{i}` already exists")),
|
||||||
Err(e) => d.error(&e.to_string()),
|
},
|
||||||
}
|
Err(e) => d.error(&format!("Error on save torrent file `{i}`: {e}")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
use std::{fs, path::PathBuf, str::FromStr};
|
||||||
|
|
||||||
pub struct Storage(PathBuf);
|
pub struct Storage(PathBuf);
|
||||||
|
|
||||||
|
|
@ -25,18 +25,6 @@ impl Storage {
|
||||||
Ok(Self(p))
|
Ok(Self(p))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn torrent_exists(&self, infohash: &str) -> bool {
|
|
||||||
fs::metadata(self.torrent(infohash))
|
|
||||||
.is_ok_and(|p| p.is_file() || p.is_dir() || p.is_symlink())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn save_torrent(&self, infohash: &str, bytes: &[u8]) -> Result<PathBuf> {
|
|
||||||
let p = self.torrent(infohash);
|
|
||||||
let mut f = fs::File::create(&p)?;
|
|
||||||
f.write_all(bytes)?;
|
|
||||||
Ok(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn output_folder(&self, infohash: &str, create: bool) -> Result<String> {
|
pub fn output_folder(&self, infohash: &str, create: bool) -> Result<String> {
|
||||||
let mut p = PathBuf::new();
|
let mut p = PathBuf::new();
|
||||||
p.push(&self.0);
|
p.push(&self.0);
|
||||||
|
|
@ -76,11 +64,4 @@ impl Storage {
|
||||||
pub fn path(&self) -> PathBuf {
|
pub fn path(&self) -> PathBuf {
|
||||||
self.0.clone()
|
self.0.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn torrent(&self, infohash: &str) -> PathBuf {
|
|
||||||
let mut p = PathBuf::new();
|
|
||||||
p.push(&self.0);
|
|
||||||
p.push(format!("{infohash}.torrent"));
|
|
||||||
p
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
32
src/torrent.rs
Normal file
32
src/torrent.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
||||||
|
|
||||||
|
pub struct Torrent {
|
||||||
|
storage: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Torrent {
|
||||||
|
pub fn init(path: &str) -> Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
storage: PathBuf::from_str(path)?.canonicalize()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn persist(&self, infohash: &str, data: &[u8]) -> Result<Option<PathBuf>> {
|
||||||
|
Ok(if self.path(infohash).exists() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let p = self.path(infohash);
|
||||||
|
let mut f = fs::File::create(&p)?;
|
||||||
|
f.write_all(data)?;
|
||||||
|
Some(p)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path(&self, infohash: &str) -> PathBuf {
|
||||||
|
let mut p = PathBuf::new();
|
||||||
|
p.push(&self.storage);
|
||||||
|
p.push(format!("{infohash}.torrent"));
|
||||||
|
p
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue