mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-04-01 01:25:36 +00:00
initial commit
This commit is contained in:
parent
9fcd892d42
commit
d55c642eb6
11 changed files with 305 additions and 1 deletions
13
src/api.rs
Normal file
13
src/api.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/// Parse infohash from the source filepath,
|
||||
/// decode JSON to array on success
|
||||
pub fn infohashes(path: &str) -> anyhow::Result<Vec<String>> {
|
||||
let mut f = std::fs::File::open(path)?;
|
||||
let mut s = String::new();
|
||||
|
||||
use std::io::Read;
|
||||
f.read_to_string(&mut s)?;
|
||||
|
||||
let r: Vec<String> = serde_json::from_str(&s)?;
|
||||
|
||||
Ok(r)
|
||||
}
|
||||
26
src/argument.rs
Normal file
26
src/argument.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Argument {
|
||||
/// Debug level
|
||||
///
|
||||
/// * `e` - error
|
||||
/// * `i` - info
|
||||
#[arg(short, long, default_value_t = String::from("ei"))]
|
||||
pub debug: String,
|
||||
|
||||
/// Filepath(s) to the Aquatic tracker info-hash JSON/API
|
||||
///
|
||||
/// * PR #233 info-hash table implementation has multiple source tables for IPv4 and IPv6
|
||||
#[arg(short, long)]
|
||||
pub infohash_source: Vec<String>,
|
||||
|
||||
/// Directory path to store the `.torrent` files
|
||||
#[arg(short, long)]
|
||||
pub torrents_path: Option<String>,
|
||||
|
||||
/// Crawl loop delay in seconds
|
||||
#[arg(short, long, default_value_t = 300)]
|
||||
pub sleep: u64,
|
||||
}
|
||||
1
src/database.rs
Normal file
1
src/database.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod torrent;
|
||||
33
src/database/torrent.rs
Normal file
33
src/database/torrent.rs
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
use anyhow::{Result, bail};
|
||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
||||
|
||||
pub struct Storage(PathBuf);
|
||||
|
||||
impl Storage {
|
||||
pub fn init(storage: &str) -> Result<Self> {
|
||||
let p = PathBuf::from_str(storage)?;
|
||||
if fs::metadata(&p).is_ok_and(|t| t.is_file()) {
|
||||
bail!("Target destination is not directory!")
|
||||
}
|
||||
fs::create_dir_all(storage)?;
|
||||
Ok(Self(p))
|
||||
}
|
||||
|
||||
pub fn exists(&self, infohash: &str) -> bool {
|
||||
fs::metadata(self.filename(infohash)).is_ok_and(|p| p.is_file())
|
||||
}
|
||||
|
||||
pub fn save(&self, infohash: &str, bytes: &[u8]) -> Result<PathBuf> {
|
||||
let p = self.filename(infohash);
|
||||
let mut f = fs::File::create(&p)?;
|
||||
f.write_all(bytes)?;
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
fn filename(&self, infohash: &str) -> PathBuf {
|
||||
let mut p = PathBuf::new();
|
||||
p.push(&self.0);
|
||||
p.push(format!("{infohash}.torrent"));
|
||||
p
|
||||
}
|
||||
}
|
||||
19
src/debug.rs
Normal file
19
src/debug.rs
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
pub fn error(e: &anyhow::Error) {
|
||||
eprintln!(
|
||||
"[{}] [error] {e}",
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn info(message: String) {
|
||||
eprintln!(
|
||||
"[{}] [info] {message}",
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
)
|
||||
}
|
||||
91
src/main.rs
Normal file
91
src/main.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
mod api;
|
||||
mod argument;
|
||||
mod database;
|
||||
mod debug;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
use clap::Parser;
|
||||
let argument = argument::Argument::parse();
|
||||
|
||||
// calculate debug level once
|
||||
let is_debug_i = argument.debug.contains("i");
|
||||
let is_debug_e = argument.debug.contains("e");
|
||||
|
||||
// init shared members
|
||||
let torrent_storage = if let Some(t) = argument.torrents_path {
|
||||
Some(database::torrent::Storage::init(&t)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if is_debug_i {
|
||||
debug::info(String::from("Crawler started"));
|
||||
}
|
||||
|
||||
loop {
|
||||
if is_debug_i {
|
||||
debug::info(String::from("New index session begin..."));
|
||||
}
|
||||
let mut total = 0;
|
||||
let session = librqbit::Session::new(std::path::PathBuf::new()).await?;
|
||||
// collect info-hashes from API
|
||||
for source in &argument.infohash_source {
|
||||
if is_debug_i {
|
||||
debug::info(format!("Handle info-hash source `{source}`..."));
|
||||
}
|
||||
|
||||
// aquatic server may update the stats at this moment,
|
||||
// handle this state manually
|
||||
match api::infohashes(source) {
|
||||
Ok(infohashes) => {
|
||||
total += infohashes.len();
|
||||
for i in infohashes {
|
||||
if torrent_storage.as_ref().is_some_and(|s| !s.exists(&i)) {
|
||||
match session
|
||||
.add_torrent(
|
||||
librqbit::AddTorrent::from_url(format!(
|
||||
"magnet:?xt=urn:btih:{i}"
|
||||
)),
|
||||
Some(librqbit::AddTorrentOptions {
|
||||
list_only: true,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
librqbit::AddTorrentResponse::ListOnly(r) => {
|
||||
if let Some(ref s) = torrent_storage {
|
||||
let p = s.save(&i, &r.torrent_bytes)?;
|
||||
if is_debug_i {
|
||||
debug::info(format!(
|
||||
"Add new torrent file `{}`",
|
||||
p.to_string_lossy()
|
||||
));
|
||||
}
|
||||
}
|
||||
// @TODO
|
||||
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(ref e) => {
|
||||
if is_debug_e {
|
||||
debug::error(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
session.stop().await;
|
||||
if is_debug_i {
|
||||
debug::info(format!(
|
||||
"Index of {total} hashes completed, await {} seconds to continue...",
|
||||
argument.sleep,
|
||||
));
|
||||
}
|
||||
std::thread::sleep(std::time::Duration::from_secs(argument.sleep));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue