mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
initial commit
This commit is contained in:
parent
9fcd892d42
commit
d55c642eb6
11 changed files with 305 additions and 1 deletions
1
.github/FUNDING.yml
vendored
Normal file
1
.github/FUNDING.yml
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
custom: https://yggverse.github.io/#donate
|
||||
27
.github/workflows/build.yml
vendored
Normal file
27
.github/workflows/build.yml
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTFLAGS: -Dwarnings
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run rustfmt
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Run clippy
|
||||
run: cargo clippy --all-targets
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
Cargo.lock
|
||||
20
Cargo.toml
Normal file
20
Cargo.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "aquatic-crawler"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
description = "Crawler tool for the Aquatic BitTorrent tracker API"
|
||||
keywords = ["aquatic", "crawler", "parser", "bittorrent", "magnet"]
|
||||
categories = ["network-programming"]
|
||||
repository = "https://github.com/YGGverse/aquatic-crawler"
|
||||
# homepage = "https://yggverse.github.io"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
hyper-util = "0.1.14"
|
||||
librqbit = "8.1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
73
README.md
73
README.md
|
|
@ -1 +1,72 @@
|
|||
# aquatic-crawler
|
||||
# aquatic-crawler
|
||||
|
||||

|
||||
[](https://deps.rs/repo/github/YGGverse/aquatic-crawler)
|
||||
[](https://crates.io/crates/aquatic-crawler)
|
||||
|
||||
Crawler/aggregation tool for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker.
|
||||
|
||||
> [!NOTE]
|
||||
> Project in development!
|
||||
|
||||
## Roadmap
|
||||
|
||||
* Targets supported
|
||||
* [x] IPv4/IPv6 info-hash JSON/API (see [PR#233](https://github.com/greatest-ape/aquatic/pull/233))
|
||||
* [x] local file path
|
||||
* [ ] remote URL
|
||||
* Storage
|
||||
* [x] File system (dump as `.torrent`)
|
||||
[x] V1
|
||||
[ ] V2
|
||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
||||
* [ ] SQLite
|
||||
* Tools
|
||||
* [ ] Storage cleaner
|
||||
* [ ] Implement tests
|
||||
|
||||
## Install
|
||||
|
||||
1. `git clone https://github.com/YGGverse/aquatic-crawler.git && cd aquatic-crawler`
|
||||
2. `cargo build --release`
|
||||
3. `sudo install target/release/aquatic-crawler /usr/local/bin/aquatic-crawler`
|
||||
|
||||
## Usage
|
||||
|
||||
``` bash
|
||||
aquatic-crawler --infohash-source /path/to/info-hash-ipv4.json\
|
||||
--infohash-source /path/to/info-hash-ipv6.json\
|
||||
--infohash-source /path/to/another-source.json\
|
||||
--torrents-path /path/to/storage
|
||||
```
|
||||
* all arguments are optional, to support multiple source and target drivers
|
||||
running without arguments does nothing!
|
||||
|
||||
### Options
|
||||
|
||||
``` bash
|
||||
Options:
|
||||
-d, --debug <DEBUG>
|
||||
Debug level
|
||||
|
||||
* `e` - error * `i` - info
|
||||
|
||||
[default: ei]
|
||||
|
||||
-i, --infohash-source <INFOHASH_SOURCE>
|
||||
Filepath(s) to the Aquatic tracker info-hash JSON/API (PR#233)
|
||||
|
||||
-t, --torrents-path <TORRENTS_PATH>
|
||||
Directory path to store the `.torrent` files
|
||||
|
||||
-s, --sleep <SLEEP>
|
||||
Crawl loop delay in seconds
|
||||
|
||||
[default: 300]
|
||||
|
||||
-h, --help
|
||||
Print help (see a summary with '-h')
|
||||
|
||||
-V, --version
|
||||
Print version
|
||||
```
|
||||
13
src/api.rs
Normal file
13
src/api.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/// Parse infohash from the source filepath,
|
||||
/// decode JSON to array on success
|
||||
pub fn infohashes(path: &str) -> anyhow::Result<Vec<String>> {
|
||||
let mut f = std::fs::File::open(path)?;
|
||||
let mut s = String::new();
|
||||
|
||||
use std::io::Read;
|
||||
f.read_to_string(&mut s)?;
|
||||
|
||||
let r: Vec<String> = serde_json::from_str(&s)?;
|
||||
|
||||
Ok(r)
|
||||
}
|
||||
26
src/argument.rs
Normal file
26
src/argument.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Argument {
|
||||
/// Debug level
|
||||
///
|
||||
/// * `e` - error
|
||||
/// * `i` - info
|
||||
#[arg(short, long, default_value_t = String::from("ei"))]
|
||||
pub debug: String,
|
||||
|
||||
/// Filepath(s) to the Aquatic tracker info-hash JSON/API
|
||||
///
|
||||
/// * PR #233 info-hash table implementation has multiple source tables for IPv4 and IPv6
|
||||
#[arg(short, long)]
|
||||
pub infohash_source: Vec<String>,
|
||||
|
||||
/// Directory path to store the `.torrent` files
|
||||
#[arg(short, long)]
|
||||
pub torrents_path: Option<String>,
|
||||
|
||||
/// Crawl loop delay in seconds
|
||||
#[arg(short, long, default_value_t = 300)]
|
||||
pub sleep: u64,
|
||||
}
|
||||
1
src/database.rs
Normal file
1
src/database.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod torrent;
|
||||
33
src/database/torrent.rs
Normal file
33
src/database/torrent.rs
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
use anyhow::{Result, bail};
|
||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
||||
|
||||
pub struct Storage(PathBuf);
|
||||
|
||||
impl Storage {
|
||||
pub fn init(storage: &str) -> Result<Self> {
|
||||
let p = PathBuf::from_str(storage)?;
|
||||
if fs::metadata(&p).is_ok_and(|t| t.is_file()) {
|
||||
bail!("Target destination is not directory!")
|
||||
}
|
||||
fs::create_dir_all(storage)?;
|
||||
Ok(Self(p))
|
||||
}
|
||||
|
||||
pub fn exists(&self, infohash: &str) -> bool {
|
||||
fs::metadata(self.filename(infohash)).is_ok_and(|p| p.is_file())
|
||||
}
|
||||
|
||||
pub fn save(&self, infohash: &str, bytes: &[u8]) -> Result<PathBuf> {
|
||||
let p = self.filename(infohash);
|
||||
let mut f = fs::File::create(&p)?;
|
||||
f.write_all(bytes)?;
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
fn filename(&self, infohash: &str) -> PathBuf {
|
||||
let mut p = PathBuf::new();
|
||||
p.push(&self.0);
|
||||
p.push(format!("{infohash}.torrent"));
|
||||
p
|
||||
}
|
||||
}
|
||||
19
src/debug.rs
Normal file
19
src/debug.rs
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
pub fn error(e: &anyhow::Error) {
|
||||
eprintln!(
|
||||
"[{}] [error] {e}",
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn info(message: String) {
|
||||
eprintln!(
|
||||
"[{}] [info] {message}",
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
)
|
||||
}
|
||||
91
src/main.rs
Normal file
91
src/main.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
mod api;
|
||||
mod argument;
|
||||
mod database;
|
||||
mod debug;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
use clap::Parser;
|
||||
let argument = argument::Argument::parse();
|
||||
|
||||
// calculate debug level once
|
||||
let is_debug_i = argument.debug.contains("i");
|
||||
let is_debug_e = argument.debug.contains("e");
|
||||
|
||||
// init shared members
|
||||
let torrent_storage = if let Some(t) = argument.torrents_path {
|
||||
Some(database::torrent::Storage::init(&t)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if is_debug_i {
|
||||
debug::info(String::from("Crawler started"));
|
||||
}
|
||||
|
||||
loop {
|
||||
if is_debug_i {
|
||||
debug::info(String::from("New index session begin..."));
|
||||
}
|
||||
let mut total = 0;
|
||||
let session = librqbit::Session::new(std::path::PathBuf::new()).await?;
|
||||
// collect info-hashes from API
|
||||
for source in &argument.infohash_source {
|
||||
if is_debug_i {
|
||||
debug::info(format!("Handle info-hash source `{source}`..."));
|
||||
}
|
||||
|
||||
// aquatic server may update the stats at this moment,
|
||||
// handle this state manually
|
||||
match api::infohashes(source) {
|
||||
Ok(infohashes) => {
|
||||
total += infohashes.len();
|
||||
for i in infohashes {
|
||||
if torrent_storage.as_ref().is_some_and(|s| !s.exists(&i)) {
|
||||
match session
|
||||
.add_torrent(
|
||||
librqbit::AddTorrent::from_url(format!(
|
||||
"magnet:?xt=urn:btih:{i}"
|
||||
)),
|
||||
Some(librqbit::AddTorrentOptions {
|
||||
list_only: true,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
librqbit::AddTorrentResponse::ListOnly(r) => {
|
||||
if let Some(ref s) = torrent_storage {
|
||||
let p = s.save(&i, &r.torrent_bytes)?;
|
||||
if is_debug_i {
|
||||
debug::info(format!(
|
||||
"Add new torrent file `{}`",
|
||||
p.to_string_lossy()
|
||||
));
|
||||
}
|
||||
}
|
||||
// @TODO
|
||||
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(ref e) => {
|
||||
if is_debug_e {
|
||||
debug::error(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
session.stop().await;
|
||||
if is_debug_i {
|
||||
debug::info(format!(
|
||||
"Index of {total} hashes completed, await {} seconds to continue...",
|
||||
argument.sleep,
|
||||
));
|
||||
}
|
||||
std::thread::sleep(std::time::Duration::from_secs(argument.sleep));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue