mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
remove extra features
This commit is contained in:
parent
1395257882
commit
8cbae5019d
12 changed files with 153 additions and 972 deletions
10
Cargo.toml
10
Cargo.toml
|
|
@ -4,25 +4,25 @@ version = "0.2.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
description = "SSD-friendly crawler for the Aquatic BitTorrent tracker based on librqbit API"
|
description = "SSD-friendly crawler for the Aquatic BitTorrent tracker, based on the librqbit API"
|
||||||
keywords = ["aquatic", "librqbit", "rqbit", "crawler", "bittorrent"]
|
keywords = ["aquatic", "librqbit", "bittorrent", "crawler", "resolver"]
|
||||||
categories = ["network-programming"]
|
categories = ["network-programming"]
|
||||||
repository = "https://github.com/YGGverse/aquatic-crawler"
|
repository = "https://github.com/YGGverse/aquatic-crawler"
|
||||||
# homepage = "https://yggverse.github.io"
|
# homepage = "https://yggverse.github.io"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
chrono = "0.4.41"
|
chrono = "0.4"
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
hyper-util = "0.1"
|
|
||||||
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
|
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
|
||||||
|
log = "0.4"
|
||||||
regex = "1.11"
|
regex = "1.11"
|
||||||
tokio = { version = "1.45", features = ["full"] }
|
tokio = { version = "1.45", features = ["full"] }
|
||||||
tracing-subscriber = "0.3"
|
tracing-subscriber = "0.3"
|
||||||
url = "2.5"
|
url = "2.5"
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
voca_rs = "1.15"
|
|
||||||
walkdir = "2.5"
|
walkdir = "2.5"
|
||||||
|
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
||||||
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }
|
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }
|
||||||
168
README.md
168
README.md
|
|
@ -4,37 +4,11 @@
|
||||||
[](https://deps.rs/repo/github/YGGverse/aquatic-crawler)
|
[](https://deps.rs/repo/github/YGGverse/aquatic-crawler)
|
||||||
[](https://crates.io/crates/aquatic-crawler)
|
[](https://crates.io/crates/aquatic-crawler)
|
||||||
|
|
||||||
SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker based on [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API
|
SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker, based on the [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> Compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API))
|
> * requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), see the [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic) for more details
|
||||||
|
> * compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API))
|
||||||
## Conception
|
|
||||||
|
|
||||||
See the project [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki)
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> For details on all implemented features, see the [Options](#options) section
|
|
||||||
|
|
||||||
* Info-hash versions
|
|
||||||
* [x] 1
|
|
||||||
* [ ] 2
|
|
||||||
* Import sources
|
|
||||||
* [x] IPv4 / IPv6 info-hash binary API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
|
||||||
* [x] local file path
|
|
||||||
* [ ] remote URL
|
|
||||||
* Export options
|
|
||||||
* [x] Content (`--preload`)
|
|
||||||
* [x] data match the regex pattern (`--preload-regex`)
|
|
||||||
* [x] data match limits (see `--preload-*` options group)
|
|
||||||
* [x] Resolved `.torrent` files (`--export-torrents`)
|
|
||||||
* [x] RSS feed (`--export-rss`) includes resolved torrent meta and magnet links to download
|
|
||||||
* customize feed options with `--export-rss-*` options group
|
|
||||||
* [ ] [Gemtext](https://geminiprotocol.net/docs/gemtext.gmi) static files catalog
|
|
||||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search index
|
|
||||||
* [ ] SQLite database index
|
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
|
|
@ -53,140 +27,12 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
||||||
--infohash /path/to/another-source.bin\
|
--infohash /path/to/another-source.bin\
|
||||||
--tracker udp://host1:port\
|
--tracker udp://host1:port\
|
||||||
--tracker udp://host2:port\
|
--tracker udp://host2:port\
|
||||||
--preload /path/to/directory\
|
--preload /path/to/directory
|
||||||
--enable-tcp
|
|
||||||
```
|
```
|
||||||
|
* append `RUST_LOG=debug` to debug
|
||||||
|
|
||||||
### Options
|
### Options
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
-d, --debug
|
aquatic-crawler --help
|
||||||
Print debug output
|
```
|
||||||
|
|
||||||
--infohash <INFOHASH>
|
|
||||||
Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
|
||||||
* PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
|
||||||
|
|
||||||
--tracker <TRACKER>
|
|
||||||
Define custom tracker(s) to preload the `.torrent` files info
|
|
||||||
|
|
||||||
--initial-peer <INITIAL_PEER>
|
|
||||||
Define initial peer(s) to preload the `.torrent` files info
|
|
||||||
|
|
||||||
--export-torrents <EXPORT_TORRENTS>
|
|
||||||
Save resolved torrent files to given directory
|
|
||||||
|
|
||||||
--export-rss <EXPORT_RSS>
|
|
||||||
File path to export RSS feed
|
|
||||||
|
|
||||||
--export-rss-title <EXPORT_RSS_TITLE>
|
|
||||||
Custom title for RSS feed (channel)
|
|
||||||
|
|
||||||
[default: aquatic-crawler]
|
|
||||||
|
|
||||||
--export-rss-link <EXPORT_RSS_LINK>
|
|
||||||
Custom link for RSS feed (channel)
|
|
||||||
|
|
||||||
--export-rss-description <EXPORT_RSS_DESCRIPTION>
|
|
||||||
Custom description for RSS feed (channel)
|
|
||||||
|
|
||||||
--export-trackers
|
|
||||||
Appends `--tracker` value to magnets and torrents
|
|
||||||
|
|
||||||
--enable-dht
|
|
||||||
Enable DHT resolver
|
|
||||||
|
|
||||||
--enable-tcp
|
|
||||||
Enable TCP connection
|
|
||||||
|
|
||||||
--bind <BIND>
|
|
||||||
Bind resolver session on specified device name (`tun0`, `mycelium`, etc.)
|
|
||||||
|
|
||||||
--listen <LISTEN>
|
|
||||||
Bind listener on specified `host:port` (`[host]:port` for IPv6)
|
|
||||||
|
|
||||||
* this option is useful only for binding the data exchange service,
|
|
||||||
to restrict the outgoing connections for torrent resolver, use `bind` option instead
|
|
||||||
|
|
||||||
--listen-upnp
|
|
||||||
Enable UPnP forwarding
|
|
||||||
|
|
||||||
--enable-upload
|
|
||||||
Enable upload (share bytes received with BitTorrent network)
|
|
||||||
|
|
||||||
--preload <PRELOAD>
|
|
||||||
Directory path to store preloaded data (e.g. `.torrent` files)
|
|
||||||
|
|
||||||
--preload-clear
|
|
||||||
Clear previous data collected on crawl session start
|
|
||||||
|
|
||||||
--preload-regex <PRELOAD_REGEX>
|
|
||||||
Preload only files match regex pattern (list only without preload by default)
|
|
||||||
* see also `preload_max_filesize`, `preload_max_filecount` options
|
|
||||||
|
|
||||||
## Example:
|
|
||||||
|
|
||||||
Filter by image ext ``` --preload-regex '(png|gif|jpeg|jpg|webp)$' ```
|
|
||||||
|
|
||||||
* requires `storage` argument defined
|
|
||||||
|
|
||||||
--preload-total-size <PRELOAD_TOTAL_SIZE>
|
|
||||||
Stop crawler on total preload files size reached
|
|
||||||
|
|
||||||
--preload-max-filesize <PRELOAD_MAX_FILESIZE>
|
|
||||||
Max size sum of preloaded files per torrent (match `preload_regex`)
|
|
||||||
|
|
||||||
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
|
|
||||||
Max count of preloaded files per torrent (match `preload_regex`)
|
|
||||||
|
|
||||||
--proxy-url <PROXY_URL>
|
|
||||||
Use `socks5://[username:password@]host:port`
|
|
||||||
|
|
||||||
--peer-connect-timeout <PEER_CONNECT_TIMEOUT>
|
|
||||||
|
|
||||||
|
|
||||||
--peer-read-write-timeout <PEER_READ_WRITE_TIMEOUT>
|
|
||||||
|
|
||||||
|
|
||||||
--peer-keep-alive-interval <PEER_KEEP_ALIVE_INTERVAL>
|
|
||||||
|
|
||||||
|
|
||||||
--index-capacity <INDEX_CAPACITY>
|
|
||||||
Estimated info-hash index capacity
|
|
||||||
|
|
||||||
[default: 1000]
|
|
||||||
|
|
||||||
--index-list
|
|
||||||
Index torrent files
|
|
||||||
|
|
||||||
--index-list-limit <INDEX_LIST_LIMIT>
|
|
||||||
Limit torrent files quantity to index
|
|
||||||
* insert the `...` placeholder as the last item, with total size left
|
|
||||||
|
|
||||||
[default: 100]
|
|
||||||
|
|
||||||
--index-timeout <INDEX_TIMEOUT>
|
|
||||||
Remove records from index older than `seconds`
|
|
||||||
|
|
||||||
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
|
||||||
Max time to handle each torrent
|
|
||||||
|
|
||||||
[default: 10]
|
|
||||||
|
|
||||||
--sleep <SLEEP>
|
|
||||||
Crawl loop delay in seconds
|
|
||||||
|
|
||||||
[default: 300]
|
|
||||||
|
|
||||||
--upload-limit <UPLOAD_LIMIT>
|
|
||||||
Limit upload speed (b/s)
|
|
||||||
|
|
||||||
--download-limit <DOWNLOAD_LIMIT>
|
|
||||||
Limit download speed (b/s)
|
|
||||||
|
|
||||||
-h, --help
|
|
||||||
Print help (see a summary with '-h')
|
|
||||||
|
|
||||||
-V, --version
|
|
||||||
Print version
|
|
||||||
```
|
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,11 @@
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use regex::Regex;
|
||||||
|
use std::{net::SocketAddr, path::PathBuf};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(version, about, long_about = None)]
|
#[command(version, about, long_about = None)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
/// Print debug output
|
|
||||||
#[arg(short, long, default_value_t = false)]
|
|
||||||
pub debug: bool,
|
|
||||||
|
|
||||||
/// Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
/// Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
||||||
///
|
///
|
||||||
/// * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
/// * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||||
|
|
@ -15,31 +14,11 @@ pub struct Config {
|
||||||
|
|
||||||
/// Define custom tracker(s) to preload the `.torrent` files info
|
/// Define custom tracker(s) to preload the `.torrent` files info
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub tracker: Vec<String>,
|
pub tracker: Vec<Url>,
|
||||||
|
|
||||||
/// Define initial peer(s) to preload the `.torrent` files info
|
/// Define initial peer(s) to preload the `.torrent` files info
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub initial_peer: Vec<String>,
|
pub initial_peer: Option<Vec<SocketAddr>>,
|
||||||
|
|
||||||
/// Save resolved torrent files to given directory
|
|
||||||
#[arg(long)]
|
|
||||||
pub export_torrents: Option<String>,
|
|
||||||
|
|
||||||
/// File path to export RSS feed
|
|
||||||
#[arg(long)]
|
|
||||||
pub export_rss: Option<String>,
|
|
||||||
|
|
||||||
/// Custom title for RSS feed (channel)
|
|
||||||
#[arg(long, default_value_t = String::from("aquatic-crawler"))]
|
|
||||||
pub export_rss_title: String,
|
|
||||||
|
|
||||||
/// Custom link for RSS feed (channel)
|
|
||||||
#[arg(long)]
|
|
||||||
pub export_rss_link: Option<String>,
|
|
||||||
|
|
||||||
/// Custom description for RSS feed (channel)
|
|
||||||
#[arg(long)]
|
|
||||||
pub export_rss_description: Option<String>,
|
|
||||||
|
|
||||||
/// Appends `--tracker` value to magnets and torrents
|
/// Appends `--tracker` value to magnets and torrents
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
|
|
@ -49,9 +28,9 @@ pub struct Config {
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub enable_dht: bool,
|
pub enable_dht: bool,
|
||||||
|
|
||||||
/// Enable TCP connection
|
/// Disable TCP connection
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub enable_tcp: bool,
|
pub disable_tcp: bool,
|
||||||
|
|
||||||
/// Bind resolver session on specified device name (`tun0`, `mycelium`, etc.)
|
/// Bind resolver session on specified device name (`tun0`, `mycelium`, etc.)
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
|
|
@ -74,11 +53,7 @@ pub struct Config {
|
||||||
|
|
||||||
/// Directory path to store preloaded data (e.g. `.torrent` files)
|
/// Directory path to store preloaded data (e.g. `.torrent` files)
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub preload: Option<String>,
|
pub preload: PathBuf,
|
||||||
|
|
||||||
/// Clear previous data collected on crawl session start
|
|
||||||
#[arg(long, default_value_t = false)]
|
|
||||||
pub preload_clear: bool,
|
|
||||||
|
|
||||||
/// Preload only files match regex pattern (list only without preload by default)
|
/// Preload only files match regex pattern (list only without preload by default)
|
||||||
/// * see also `preload_max_filesize`, `preload_max_filecount` options
|
/// * see also `preload_max_filesize`, `preload_max_filecount` options
|
||||||
|
|
@ -92,11 +67,7 @@ pub struct Config {
|
||||||
///
|
///
|
||||||
/// * requires `storage` argument defined
|
/// * requires `storage` argument defined
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub preload_regex: Option<String>,
|
pub preload_regex: Option<Regex>,
|
||||||
|
|
||||||
/// Stop crawler on total preload files size reached
|
|
||||||
#[arg(long)]
|
|
||||||
pub preload_total_size: Option<u64>,
|
|
||||||
|
|
||||||
/// Max size sum of preloaded files per torrent (match `preload_regex`)
|
/// Max size sum of preloaded files per torrent (match `preload_regex`)
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
|
|
@ -108,7 +79,7 @@ pub struct Config {
|
||||||
|
|
||||||
/// Use `socks5://[username:password@]host:port`
|
/// Use `socks5://[username:password@]host:port`
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub proxy_url: Option<String>,
|
pub proxy_url: Option<Url>,
|
||||||
|
|
||||||
// Peer options
|
// Peer options
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
|
|
|
||||||
|
|
@ -1,24 +0,0 @@
|
||||||
pub trait Format {
|
|
||||||
/// Format bytes to KB/MB/GB presentation
|
|
||||||
fn bytes(self) -> String;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Format for u64 {
|
|
||||||
fn bytes(self) -> String {
|
|
||||||
const KB: f32 = 1024.0;
|
|
||||||
const MB: f32 = KB * KB;
|
|
||||||
const GB: f32 = MB * KB;
|
|
||||||
|
|
||||||
let f = self as f32;
|
|
||||||
|
|
||||||
if f < KB {
|
|
||||||
format!("{self} B")
|
|
||||||
} else if f < MB {
|
|
||||||
format!("{:.2} KB", f / KB)
|
|
||||||
} else if f < GB {
|
|
||||||
format!("{:.2} MB", f / MB)
|
|
||||||
} else {
|
|
||||||
format!("{:.2} GB", f / GB)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
111
src/index.rs
111
src/index.rs
|
|
@ -1,111 +0,0 @@
|
||||||
mod value;
|
|
||||||
|
|
||||||
use chrono::{Duration, Utc};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use value::Value;
|
|
||||||
|
|
||||||
/// Collect processed info hashes to skip on the next iterations (for this session)
|
|
||||||
/// * also contains optional meta info to export index as RSS or any other format
|
|
||||||
pub struct Index {
|
|
||||||
index: HashMap<String, Value>,
|
|
||||||
/// Removes outdated values from `index` on `Self::refresh` action
|
|
||||||
timeout: Option<Duration>,
|
|
||||||
/// Track index changes to prevent extra disk write operations (safe SSD life)
|
|
||||||
/// * useful in the static RSS feed generation case, if enabled
|
|
||||||
is_changed: bool,
|
|
||||||
/// Store the index value in memory only when it is in use by the init options
|
|
||||||
has_name: bool,
|
|
||||||
has_size: bool,
|
|
||||||
has_list: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Index {
|
|
||||||
pub fn init(
|
|
||||||
capacity: usize,
|
|
||||||
timeout: Option<i64>,
|
|
||||||
has_name: bool,
|
|
||||||
has_size: bool,
|
|
||||||
has_list: bool,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
index: HashMap::with_capacity(capacity),
|
|
||||||
timeout: timeout.map(Duration::seconds),
|
|
||||||
has_size,
|
|
||||||
has_name,
|
|
||||||
has_list,
|
|
||||||
is_changed: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has(&self, infohash: &str) -> bool {
|
|
||||||
self.index.contains_key(infohash)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_changed(&self) -> bool {
|
|
||||||
self.is_changed
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn list(&self) -> &HashMap<String, Value> {
|
|
||||||
&self.index
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
self.index.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nodes(&self) -> u64 {
|
|
||||||
self.index.values().map(|i| i.node).sum::<u64>()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(
|
|
||||||
&mut self,
|
|
||||||
infohash: String,
|
|
||||||
node: u64,
|
|
||||||
size: u64,
|
|
||||||
list: Option<Vec<(Option<String>, u64)>>,
|
|
||||||
name: Option<String>,
|
|
||||||
) {
|
|
||||||
if self
|
|
||||||
.index
|
|
||||||
.insert(
|
|
||||||
infohash,
|
|
||||||
Value::new(
|
|
||||||
node,
|
|
||||||
if self.has_size { Some(size) } else { None },
|
|
||||||
if self.has_name { name } else { None },
|
|
||||||
if self.has_list { list } else { None },
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.is_none()
|
|
||||||
{
|
|
||||||
self.is_changed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn refresh(&mut self) {
|
|
||||||
if let Some(timeout) = self.timeout {
|
|
||||||
let t = Utc::now();
|
|
||||||
self.index.retain(|_, v| t - v.time <= timeout)
|
|
||||||
}
|
|
||||||
self.is_changed = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test() {
|
|
||||||
use std::{thread::sleep, time::Duration};
|
|
||||||
|
|
||||||
// test values auto-clean by timeout
|
|
||||||
let mut i = Index::init(2, Some(3), false, false, false);
|
|
||||||
|
|
||||||
i.insert("h1".to_string(), 0, 0, None, None);
|
|
||||||
sleep(Duration::from_secs(1));
|
|
||||||
i.insert("h2".to_string(), 0, 0, None, None);
|
|
||||||
|
|
||||||
i.refresh();
|
|
||||||
assert_eq!(i.len(), 2);
|
|
||||||
|
|
||||||
sleep(Duration::from_secs(2));
|
|
||||||
i.refresh();
|
|
||||||
assert_eq!(i.len(), 1)
|
|
||||||
}
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use voca_rs::Voca;
|
|
||||||
|
|
||||||
/// The `Index` value
|
|
||||||
pub struct Value {
|
|
||||||
pub time: DateTime<Utc>,
|
|
||||||
pub node: u64,
|
|
||||||
// Isolate by applying internal filter on value set
|
|
||||||
size: Option<u64>,
|
|
||||||
name: Option<String>,
|
|
||||||
list: Option<Vec<(Option<String>, u64)>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Value {
|
|
||||||
/// Create new `Self` with current timestamp
|
|
||||||
pub fn new(
|
|
||||||
node: u64,
|
|
||||||
size: Option<u64>,
|
|
||||||
name: Option<String>,
|
|
||||||
list: Option<Vec<(Option<String>, u64)>>,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
time: Utc::now(),
|
|
||||||
node,
|
|
||||||
size,
|
|
||||||
list: list.map(|f| f.into_iter().map(|(n, l)| (filter(n), l)).collect()),
|
|
||||||
name: filter(name),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Get reference to the safely constructed `name` member
|
|
||||||
pub fn name(&self) -> Option<&String> {
|
|
||||||
self.name.as_ref()
|
|
||||||
}
|
|
||||||
/// Get reference to the safely constructed files `list` member
|
|
||||||
pub fn list(&self) -> Option<&Vec<(Option<String>, u64)>> {
|
|
||||||
self.list.as_ref()
|
|
||||||
}
|
|
||||||
/// Get reference to the safely constructed `length` member
|
|
||||||
pub fn size(&self) -> Option<u64> {
|
|
||||||
self.size
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Strip tags and bom chars, crop long strings (prevents memory pool overload)
|
|
||||||
fn filter(value: Option<String>) -> Option<String> {
|
|
||||||
value.map(|v| {
|
|
||||||
const C: usize = 125; // + 3 for `...` offset, 128 chars max @TODO optional
|
|
||||||
let s = v._strip_bom()._strip_tags();
|
|
||||||
if s.chars().count() > C {
|
|
||||||
return format!("{}...", s.chars().take(C).collect::<String>());
|
|
||||||
}
|
|
||||||
s
|
|
||||||
})
|
|
||||||
}
|
|
||||||
351
src/main.rs
351
src/main.rs
|
|
@ -1,25 +1,15 @@
|
||||||
mod api;
|
mod api;
|
||||||
mod config;
|
mod config;
|
||||||
mod format;
|
|
||||||
mod index;
|
|
||||||
mod peers;
|
|
||||||
mod preload;
|
mod preload;
|
||||||
mod rss;
|
|
||||||
mod torrent;
|
|
||||||
mod trackers;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use config::Config;
|
use config::Config;
|
||||||
use index::Index;
|
|
||||||
use librqbit::{
|
use librqbit::{
|
||||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions,
|
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, ListenerOptions,
|
||||||
ListenerOptions, PeerConnectionOptions, SessionOptions, ValidatedTorrentMetaV1Info,
|
PeerConnectionOptions, SessionOptions,
|
||||||
};
|
};
|
||||||
use peers::Peers;
|
use preload::Preload;
|
||||||
use rss::Rss;
|
use std::{collections::HashSet, num::NonZero, str::FromStr, time::Duration};
|
||||||
use std::{collections::HashSet, num::NonZero, path::PathBuf, str::FromStr, time::Duration};
|
|
||||||
use torrent::Torrent;
|
|
||||||
use trackers::Trackers;
|
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|
@ -27,29 +17,21 @@ async fn main() -> Result<()> {
|
||||||
use chrono::Local;
|
use chrono::Local;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use tokio::time;
|
use tokio::time;
|
||||||
|
// init debug
|
||||||
|
if std::env::var("RUST_LOG").is_ok() {
|
||||||
|
tracing_subscriber::fmt::init()
|
||||||
|
} // librqbit
|
||||||
// init components
|
// init components
|
||||||
let time_init = Local::now();
|
let time_init = Local::now();
|
||||||
let config = Config::parse();
|
let config = Config::parse();
|
||||||
if std::env::var("RUST_LOG").is_ok() {
|
let preload = Preload::init(
|
||||||
tracing_subscriber::fmt::init()
|
|
||||||
}
|
|
||||||
let peers = Peers::init(&config.initial_peer)?;
|
|
||||||
let preload = preload::init(
|
|
||||||
config.preload,
|
config.preload,
|
||||||
config.preload_regex,
|
config.preload_regex,
|
||||||
config.preload_max_filecount,
|
config.preload_max_filecount,
|
||||||
config.preload_max_filesize,
|
config.preload_max_filesize,
|
||||||
config.preload_total_size,
|
|
||||||
config.preload_clear,
|
|
||||||
)?;
|
)?;
|
||||||
let trackers = Trackers::init(&config.tracker)?;
|
|
||||||
let torrent = config.export_torrents.map(|p| Torrent::init(&p).unwrap());
|
|
||||||
let session = librqbit::Session::new_with_opts(
|
let session = librqbit::Session::new_with_opts(
|
||||||
match preload {
|
preload.root().clone(),
|
||||||
Some(ref p) => p.path(),
|
|
||||||
None => PathBuf::new(),
|
|
||||||
},
|
|
||||||
SessionOptions {
|
SessionOptions {
|
||||||
bind_device_name: config.bind,
|
bind_device_name: config.bind,
|
||||||
listen: match config.listen {
|
listen: match config.listen {
|
||||||
|
|
@ -66,8 +48,8 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
connect: Some(ConnectionOptions {
|
connect: Some(ConnectionOptions {
|
||||||
enable_tcp: config.enable_tcp,
|
enable_tcp: !config.disable_tcp,
|
||||||
proxy_url: config.proxy_url,
|
proxy_url: config.proxy_url.map(|u| u.to_string()),
|
||||||
peer_opts: Some(PeerConnectionOptions {
|
peer_opts: Some(PeerConnectionOptions {
|
||||||
connect_timeout: config.peer_connect_timeout.map(Duration::from_secs),
|
connect_timeout: config.peer_connect_timeout.map(Duration::from_secs),
|
||||||
read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs),
|
read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs),
|
||||||
|
|
@ -82,54 +64,35 @@ async fn main() -> Result<()> {
|
||||||
upload_bps: config.upload_limit.and_then(NonZero::new),
|
upload_bps: config.upload_limit.and_then(NonZero::new),
|
||||||
download_bps: config.download_limit.and_then(NonZero::new),
|
download_bps: config.download_limit.and_then(NonZero::new),
|
||||||
},
|
},
|
||||||
trackers: trackers.list().clone(),
|
trackers: config.tracker.iter().cloned().collect(),
|
||||||
..SessionOptions::default()
|
..SessionOptions::default()
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
log::info!("Crawler started on {time_init}");
|
||||||
// begin
|
|
||||||
println!("Crawler started on {time_init}");
|
|
||||||
let mut index = Index::init(
|
|
||||||
config.index_capacity,
|
|
||||||
config.index_timeout,
|
|
||||||
config.export_rss.is_some(),
|
|
||||||
config.export_rss.is_some(),
|
|
||||||
config.export_rss.is_some() && config.index_list,
|
|
||||||
);
|
|
||||||
loop {
|
loop {
|
||||||
let time_queue = Local::now();
|
let time_queue = Local::now();
|
||||||
if config.debug {
|
log::debug!("Queue crawl begin on {time_queue}...");
|
||||||
println!("\tQueue crawl begin on {time_queue}...")
|
|
||||||
}
|
|
||||||
index.refresh();
|
|
||||||
for source in &config.infohash {
|
for source in &config.infohash {
|
||||||
if config.debug {
|
log::debug!("Index source `{source}`...");
|
||||||
println!("\tIndex source `{source}`...")
|
|
||||||
}
|
|
||||||
// grab latest info-hashes from this source
|
// grab latest info-hashes from this source
|
||||||
// * aquatic server may update the stats at this moment, handle result manually
|
// * aquatic server may update the stats at this moment, handle result manually
|
||||||
for i in match api::get(source, config.index_capacity) {
|
for i in match api::get(source, config.index_capacity) {
|
||||||
Some(i) => i,
|
Some(i) => i,
|
||||||
None => {
|
None => {
|
||||||
// skip without panic
|
// skip without panic
|
||||||
if config.debug {
|
log::warn!(
|
||||||
eprintln!(
|
"The feed `{source}` has an incomplete format (or is still updating); skip."
|
||||||
"The feed `{source}` has an incomplete format (or is still updating); skip."
|
);
|
||||||
)
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} {
|
} {
|
||||||
// convert to string once
|
// convert to string once
|
||||||
let i = i.to_string();
|
let i = i.to_string();
|
||||||
// is already indexed?
|
if preload.contains_torrent(&i)? {
|
||||||
if index.has(&i) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if config.debug {
|
log::debug!("Index `{i}`...");
|
||||||
println!("\t\tIndex `{i}`...")
|
|
||||||
}
|
|
||||||
// run the crawler in single thread for performance reasons,
|
// run the crawler in single thread for performance reasons,
|
||||||
// use `timeout` argument option to skip the dead connections.
|
// use `timeout` argument option to skip the dead connections.
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
|
|
@ -137,18 +100,18 @@ async fn main() -> Result<()> {
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
AddTorrent::from_url(magnet(
|
AddTorrent::from_url(magnet(
|
||||||
&i,
|
&i,
|
||||||
if config.export_trackers && !trackers.is_empty() {
|
if config.tracker.is_empty() {
|
||||||
Some(trackers.list())
|
|
||||||
} else {
|
|
||||||
None
|
None
|
||||||
|
} else {
|
||||||
|
Some(config.tracker.as_ref())
|
||||||
},
|
},
|
||||||
)),
|
)),
|
||||||
Some(AddTorrentOptions {
|
Some(AddTorrentOptions {
|
||||||
paused: true, // continue after `only_files` init
|
paused: true, // continue after `only_files` init
|
||||||
overwrite: true,
|
overwrite: true,
|
||||||
disable_trackers: trackers.is_empty(),
|
disable_trackers: config.tracker.is_empty(),
|
||||||
initial_peers: peers.initial_peers(),
|
initial_peers: config.initial_peer.clone(),
|
||||||
list_only: preload.as_ref().is_none_or(|p| p.regex.is_none()),
|
list_only: false,
|
||||||
// it is important to blacklist all files preload until initiation
|
// it is important to blacklist all files preload until initiation
|
||||||
only_files: Some(Vec::with_capacity(
|
only_files: Some(Vec::with_capacity(
|
||||||
config.preload_max_filecount.unwrap_or_default(),
|
config.preload_max_filecount.unwrap_or_default(),
|
||||||
|
|
@ -156,8 +119,9 @@ async fn main() -> Result<()> {
|
||||||
// the destination folder to preload files match `only_files_regex`
|
// the destination folder to preload files match `only_files_regex`
|
||||||
// * e.g. images for audio albums
|
// * e.g. images for audio albums
|
||||||
output_folder: preload
|
output_folder: preload
|
||||||
.as_ref()
|
.output_folder(&i)?
|
||||||
.map(|p| p.output_folder(&i, true).unwrap()),
|
.to_str()
|
||||||
|
.map(|s| s.to_string()),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
|
|
@ -167,183 +131,79 @@ async fn main() -> Result<()> {
|
||||||
Ok(r) => match r {
|
Ok(r) => match r {
|
||||||
// on `preload_regex` case only
|
// on `preload_regex` case only
|
||||||
Ok(AddTorrentResponse::Added(id, mt)) => {
|
Ok(AddTorrentResponse::Added(id, mt)) => {
|
||||||
let mut only_files_size = 0;
|
let mut keep_files = HashSet::with_capacity(
|
||||||
let mut only_files_keep = Vec::with_capacity(
|
|
||||||
config.preload_max_filecount.unwrap_or_default(),
|
config.preload_max_filecount.unwrap_or_default(),
|
||||||
);
|
);
|
||||||
let mut only_files = HashSet::with_capacity(
|
let mut only_files = HashSet::with_capacity(
|
||||||
config.preload_max_filecount.unwrap_or_default(),
|
config.preload_max_filecount.unwrap_or_default(),
|
||||||
);
|
);
|
||||||
mt.wait_until_initialized().await?;
|
mt.wait_until_initialized().await?;
|
||||||
let (name, size, list) = mt.with_metadata(|m| {
|
let bytes = mt.with_metadata(|m| {
|
||||||
// init preload files list
|
for (id, info) in m.file_infos.iter().enumerate() {
|
||||||
if let Some(ref p) = preload {
|
if preload
|
||||||
for (id, info) in m.file_infos.iter().enumerate() {
|
.max_filecount
|
||||||
if p.matches(info.relative_filename.to_str().unwrap()) {
|
.is_some_and(|limit| only_files.len() + 1 > limit)
|
||||||
if p.max_filesize.is_some_and(|limit| {
|
{
|
||||||
only_files_size + info.len > limit
|
log::debug!(
|
||||||
}) {
|
"file count limit reached, skip `{id}` for `{i}`"
|
||||||
if config.debug {
|
);
|
||||||
println!(
|
break;
|
||||||
"\t\t\ttotal files size limit `{i}` reached!"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if p.max_filecount
|
|
||||||
.is_some_and(|limit| only_files.len() + 1 > limit)
|
|
||||||
{
|
|
||||||
if config.debug {
|
|
||||||
println!(
|
|
||||||
"\t\t\ttotal files count limit for `{i}` reached!"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
only_files_size += info.len;
|
|
||||||
if let Some(ref p) = preload {
|
|
||||||
only_files_keep
|
|
||||||
.push(p.absolute(&i, &info.relative_filename))
|
|
||||||
}
|
|
||||||
only_files.insert(id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if preload.max_filesize.is_some_and(|limit| info.len > limit) {
|
||||||
|
log::debug!(
|
||||||
|
"file size limit reached, skip `{id}` for `{i}`"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if preload.regex.as_ref().is_some_and(|r| {
|
||||||
|
!r.is_match(&info.relative_filename.to_string_lossy())
|
||||||
|
}) {
|
||||||
|
log::debug!("regex filter, skip `{id}` for `{i}`");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert!(keep_files.insert(info.relative_filename.clone()));
|
||||||
|
assert!(only_files.insert(id));
|
||||||
}
|
}
|
||||||
if let Some(ref t) = torrent {
|
m.torrent_bytes.to_vec()
|
||||||
save_torrent_file(t, &i, &m.torrent_bytes, config.debug)
|
|
||||||
}
|
|
||||||
|
|
||||||
(
|
|
||||||
m.info.name().as_ref().map(|n| n.to_string()),
|
|
||||||
size(&m.info),
|
|
||||||
list(&m.info, config.index_list_limit),
|
|
||||||
)
|
|
||||||
})?;
|
})?;
|
||||||
session.update_only_files(&mt, &only_files).await?;
|
session.update_only_files(&mt, &only_files).await?;
|
||||||
session.unpause(&mt).await?;
|
session.unpause(&mt).await?;
|
||||||
// await for `preload_regex` files download to continue
|
// await for `preload_regex` files download to continue
|
||||||
mt.wait_until_completed().await?;
|
mt.wait_until_completed().await?;
|
||||||
|
// cleanup irrelevant files (see rqbit#408)
|
||||||
|
preload.cleanup(&i, Some(keep_files))?;
|
||||||
|
preload.persist_torrent_bytes(&i, &bytes)?;
|
||||||
// remove torrent from session as indexed
|
// remove torrent from session as indexed
|
||||||
session
|
session
|
||||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||||
.await?;
|
.await?;
|
||||||
// cleanup irrelevant files (see rqbit#408)
|
log::debug!("torrent `{i}` indexed.")
|
||||||
if let Some(p) = &preload {
|
|
||||||
p.cleanup(&i, Some(only_files_keep))?
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.debug {
|
|
||||||
println!("\t\t\tadd `{i}` to index.")
|
|
||||||
}
|
|
||||||
|
|
||||||
index.insert(
|
|
||||||
i,
|
|
||||||
only_files_size,
|
|
||||||
size,
|
|
||||||
list,
|
|
||||||
name.map(|n| n.to_string()),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
Ok(AddTorrentResponse::ListOnly(r)) => {
|
Ok(_) => panic!(),
|
||||||
if let Some(ref t) = torrent {
|
Err(e) => log::debug!("Failed to resolve `{i}`: `{e}`."),
|
||||||
save_torrent_file(t, &i, &r.torrent_bytes, config.debug)
|
|
||||||
}
|
|
||||||
|
|
||||||
// @TODO
|
|
||||||
// use `r.info` for Memory, SQLite,
|
|
||||||
// Manticore and other alternative storage type
|
|
||||||
|
|
||||||
if config.debug {
|
|
||||||
println!("\t\t\tadd `{i}` to index.")
|
|
||||||
}
|
|
||||||
|
|
||||||
index.insert(
|
|
||||||
i,
|
|
||||||
0,
|
|
||||||
size(&r.info),
|
|
||||||
list(&r.info, config.index_list_limit),
|
|
||||||
r.info.name().map(|n| n.to_string()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
// unexpected as should be deleted
|
|
||||||
Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
|
|
||||||
Err(e) => eprintln!("Failed to resolve `{i}`: `{e}`."),
|
|
||||||
},
|
},
|
||||||
Err(e) => {
|
Err(e) => log::debug!("failed to resolve `{i}`: `{e}`"),
|
||||||
if config.debug {
|
|
||||||
println!("\t\t\tfailed to resolve `{i}`: `{e}`")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
log::debug!(
|
||||||
if let Some(ref export_rss) = config.export_rss
|
"Queue completed at {time_queue} (time: {} / uptime: {}) await {} seconds to continue...",
|
||||||
&& index.is_changed()
|
Local::now()
|
||||||
{
|
.signed_duration_since(time_queue)
|
||||||
let mut rss = Rss::new(
|
.as_seconds_f32(),
|
||||||
export_rss,
|
Local::now()
|
||||||
&config.export_rss_title,
|
.signed_duration_since(time_init)
|
||||||
&config.export_rss_link,
|
.as_seconds_f32(),
|
||||||
&config.export_rss_description,
|
config.sleep,
|
||||||
if config.export_trackers && !trackers.is_empty() {
|
);
|
||||||
Some(trackers.list().clone())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
for (k, v) in index.list() {
|
|
||||||
rss.push(
|
|
||||||
k,
|
|
||||||
v.name().unwrap_or(k),
|
|
||||||
rss::item_description(v.size(), v.list()),
|
|
||||||
Some(&v.time.to_rfc2822()),
|
|
||||||
)?
|
|
||||||
}
|
|
||||||
rss.commit()?
|
|
||||||
}
|
|
||||||
if preload
|
|
||||||
.as_ref()
|
|
||||||
.is_some_and(|p| p.total_size.is_some_and(|s| index.nodes() > s))
|
|
||||||
{
|
|
||||||
panic!("Preload content size {} bytes reached!", 0)
|
|
||||||
}
|
|
||||||
if config.debug {
|
|
||||||
println!(
|
|
||||||
"Queue completed on {time_queue}\n\ttotal: {}\n\ttime: {} s\n\tuptime: {} s\n\tawait {} seconds to continue...",
|
|
||||||
index.len(),
|
|
||||||
Local::now()
|
|
||||||
.signed_duration_since(time_queue)
|
|
||||||
.as_seconds_f32(),
|
|
||||||
Local::now()
|
|
||||||
.signed_duration_since(time_init)
|
|
||||||
.as_seconds_f32(),
|
|
||||||
config.sleep,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
std::thread::sleep(Duration::from_secs(config.sleep))
|
std::thread::sleep(Duration::from_secs(config.sleep))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shared handler function to save resolved torrents as file
|
|
||||||
fn save_torrent_file(t: &Torrent, i: &str, b: &[u8], d: bool) {
|
|
||||||
match t.persist(i, b) {
|
|
||||||
Ok(r) => {
|
|
||||||
if d {
|
|
||||||
match r {
|
|
||||||
Some(p) => println!("\t\t\tadd torrent file `{}`", p.to_string_lossy()),
|
|
||||||
None => println!("\t\t\ttorrent file `{i}` already exists"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => eprintln!("Error on save torrent file `{i}`: `{e}`"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build magnet URI
|
/// Build magnet URI
|
||||||
fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
fn magnet(info_hash: &str, trackers: Option<&Vec<Url>>) -> String {
|
||||||
let mut m = if infohash.len() == 40 {
|
let mut m = if info_hash.len() == 40 {
|
||||||
format!("magnet:?xt=urn:btih:{infohash}")
|
format!("magnet:?xt=urn:btih:{info_hash}")
|
||||||
} else {
|
} else {
|
||||||
todo!("infohash v2 is not supported by librqbit")
|
todo!("infohash v2 is not supported by librqbit")
|
||||||
};
|
};
|
||||||
|
|
@ -355,62 +215,3 @@ fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
||||||
}
|
}
|
||||||
m
|
m
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Count total size, including torrent files
|
|
||||||
fn size(meta: &ValidatedTorrentMetaV1Info<ByteBufOwned>) -> u64 {
|
|
||||||
let mut t = 0;
|
|
||||||
if let Some(l) = meta.info().length {
|
|
||||||
t += l
|
|
||||||
}
|
|
||||||
if let Some(ref files) = meta.info().files {
|
|
||||||
for f in files {
|
|
||||||
t += f.length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list(
|
|
||||||
meta: &ValidatedTorrentMetaV1Info<ByteBufOwned>,
|
|
||||||
limit: usize,
|
|
||||||
) -> Option<Vec<(Option<String>, u64)>> {
|
|
||||||
meta.info().files.as_ref().map(|files| {
|
|
||||||
let mut b = Vec::with_capacity(files.len());
|
|
||||||
let mut i = files.iter();
|
|
||||||
let mut t = 0;
|
|
||||||
for f in i.by_ref() {
|
|
||||||
if t < limit {
|
|
||||||
t += 1;
|
|
||||||
b.push((
|
|
||||||
String::from_utf8(
|
|
||||||
f.path
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.flat_map(|(n, b)| {
|
|
||||||
if n == 0 {
|
|
||||||
b.0.to_vec()
|
|
||||||
} else {
|
|
||||||
let mut p = vec![b'/'];
|
|
||||||
p.extend(b.0.to_vec());
|
|
||||||
p
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
)
|
|
||||||
.ok(),
|
|
||||||
f.length,
|
|
||||||
));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// limit reached: count sizes left and use placeholder as the last item name
|
|
||||||
let mut l = 0;
|
|
||||||
for f in i.by_ref() {
|
|
||||||
l += f.length
|
|
||||||
}
|
|
||||||
b.push((Some("...".to_string()), l));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
b[..t].sort_by(|a, b| a.0.cmp(&b.0)); // @TODO optional
|
|
||||||
b
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
|
||||||
21
src/peers.rs
21
src/peers.rs
|
|
@ -1,21 +0,0 @@
|
||||||
use std::{net::SocketAddr, str::FromStr};
|
|
||||||
|
|
||||||
pub struct Peers(Vec<SocketAddr>);
|
|
||||||
|
|
||||||
impl Peers {
|
|
||||||
pub fn init(peers: &Vec<String>) -> anyhow::Result<Self> {
|
|
||||||
let mut p = Vec::with_capacity(peers.len());
|
|
||||||
for peer in peers {
|
|
||||||
p.push(SocketAddr::from_str(peer)?);
|
|
||||||
}
|
|
||||||
Ok(Self(p))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn initial_peers(&self) -> Option<Vec<SocketAddr>> {
|
|
||||||
if self.0.is_empty() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(self.0.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
143
src/preload.rs
143
src/preload.rs
|
|
@ -1,123 +1,90 @@
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::{fs, path::PathBuf, str::FromStr};
|
use std::{collections::HashSet, fs, path::PathBuf};
|
||||||
|
|
||||||
pub struct Preload {
|
pub struct Preload {
|
||||||
path: PathBuf,
|
root: PathBuf,
|
||||||
pub max_filecount: Option<usize>,
|
pub max_filecount: Option<usize>,
|
||||||
pub max_filesize: Option<u64>,
|
pub max_filesize: Option<u64>,
|
||||||
pub total_size: Option<u64>,
|
|
||||||
pub regex: Option<Regex>,
|
pub regex: Option<Regex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Preload {
|
impl Preload {
|
||||||
fn init(
|
// Constructors
|
||||||
directory: &str,
|
|
||||||
regex: Option<String>,
|
pub fn init(
|
||||||
|
root: PathBuf,
|
||||||
|
regex: Option<Regex>,
|
||||||
max_filecount: Option<usize>,
|
max_filecount: Option<usize>,
|
||||||
max_filesize: Option<u64>,
|
max_filesize: Option<u64>,
|
||||||
total_size: Option<u64>,
|
|
||||||
is_clear: bool,
|
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let path = PathBuf::from_str(directory)?;
|
if !root.is_dir() {
|
||||||
if let Ok(t) = fs::metadata(&path) {
|
bail!("Preload root is not directory")
|
||||||
if t.is_file() {
|
|
||||||
bail!("Storage destination is not directory!");
|
|
||||||
}
|
|
||||||
if t.is_dir() && is_clear {
|
|
||||||
for i in fs::read_dir(&path)? {
|
|
||||||
let r = i?.path();
|
|
||||||
if r.is_dir() {
|
|
||||||
fs::remove_dir_all(&r)?;
|
|
||||||
} else {
|
|
||||||
fs::remove_file(&r)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
fs::create_dir_all(&path)?;
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
max_filecount,
|
max_filecount,
|
||||||
max_filesize,
|
max_filesize,
|
||||||
path,
|
regex,
|
||||||
regex: regex.map(|r| Regex::new(&r).unwrap()),
|
root: root.canonicalize()?,
|
||||||
total_size,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn output_folder(&self, infohash: &str, create: bool) -> Result<String> {
|
// Actions
|
||||||
let mut p = PathBuf::new();
|
|
||||||
p.push(&self.path);
|
|
||||||
p.push(infohash);
|
|
||||||
if p.is_file() {
|
|
||||||
bail!("File destination is not directory!");
|
|
||||||
}
|
|
||||||
if create {
|
|
||||||
fs::create_dir_all(&p)?;
|
|
||||||
}
|
|
||||||
if !p.is_dir() {
|
|
||||||
bail!("Destination directory not exists!")
|
|
||||||
}
|
|
||||||
Ok(p.to_string_lossy().to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
|
|
||||||
let mut p = PathBuf::new();
|
|
||||||
p.push(&self.path);
|
|
||||||
p.push(infohash);
|
|
||||||
p.push(file);
|
|
||||||
p
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
||||||
pub fn cleanup(&self, infohash: &str, keep_filenames: Option<Vec<PathBuf>>) -> Result<()> {
|
pub fn cleanup(&self, info_hash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
|
||||||
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
|
for e in walkdir::WalkDir::new(self.output_folder(info_hash)?) {
|
||||||
let e = e?;
|
let e = e?;
|
||||||
let p = e.into_path();
|
let p = e.into_path();
|
||||||
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
|
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
|
||||||
fs::remove_file(p)?;
|
fs::remove_file(p)?;
|
||||||
}
|
}
|
||||||
}
|
} // remove empty directories @TODO
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn path(&self) -> PathBuf {
|
pub fn persist_torrent_bytes(&self, info_hash: &str, contents: &[u8]) -> Result<PathBuf> {
|
||||||
self.path.clone()
|
let p = self.torrent(info_hash)?;
|
||||||
|
fs::write(&p, contents)?;
|
||||||
|
Ok(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn matches(&self, pattern: &str) -> bool {
|
// Getters
|
||||||
self.regex.as_ref().is_some_and(|r| r.is_match(pattern))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Init `Preload` with validate related argument options
|
/// * creates new directory if not exists
|
||||||
pub fn init(
|
pub fn output_folder(&self, info_hash: &str) -> Result<PathBuf> {
|
||||||
path: Option<String>,
|
if !is_info_hash(info_hash) {
|
||||||
regex: Option<String>,
|
bail!("Invalid info-hash `{info_hash}`")
|
||||||
max_filecount: Option<usize>,
|
|
||||||
max_filesize: Option<u64>,
|
|
||||||
total_size: Option<u64>,
|
|
||||||
is_clear: bool,
|
|
||||||
) -> Result<Option<Preload>> {
|
|
||||||
Ok(match path {
|
|
||||||
Some(ref p) => Some(Preload::init(
|
|
||||||
p,
|
|
||||||
regex,
|
|
||||||
max_filecount,
|
|
||||||
max_filesize,
|
|
||||||
total_size,
|
|
||||||
is_clear,
|
|
||||||
)?),
|
|
||||||
None => {
|
|
||||||
if regex.is_some()
|
|
||||||
|| max_filecount.is_some()
|
|
||||||
|| max_filesize.is_some()
|
|
||||||
|| total_size.is_some()
|
|
||||||
|| is_clear
|
|
||||||
{
|
|
||||||
bail!("`--preload` directory is required for this configuration!")
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
})
|
let mut p = PathBuf::from(&self.root);
|
||||||
|
p.push(info_hash);
|
||||||
|
if p.is_file() {
|
||||||
|
bail!("Output directory for info-hash `{info_hash}` is file")
|
||||||
|
}
|
||||||
|
if !p.exists() {
|
||||||
|
fs::create_dir(&p)?
|
||||||
|
}
|
||||||
|
Ok(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn root(&self) -> &PathBuf {
|
||||||
|
&self.root
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
|
||||||
|
Ok(fs::exists(self.torrent(info_hash)?)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn torrent(&self, info_hash: &str) -> Result<PathBuf> {
|
||||||
|
if !is_info_hash(info_hash) {
|
||||||
|
bail!("Invalid info-hash `{info_hash}`")
|
||||||
|
}
|
||||||
|
let mut p = PathBuf::from(&self.root);
|
||||||
|
p.push(format!("{info_hash}.torrent"));
|
||||||
|
Ok(p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_info_hash(value: &str) -> bool {
|
||||||
|
value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
142
src/rss.rs
142
src/rss.rs
|
|
@ -1,142 +0,0 @@
|
||||||
use anyhow::{Result, bail};
|
|
||||||
use std::{collections::HashSet, fs::File, io::Write, path::PathBuf, str::FromStr};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
/// Export crawl index to the RSS file
|
|
||||||
pub struct Rss {
|
|
||||||
/// Resulting (public) file in the XML format
|
|
||||||
file: File,
|
|
||||||
/// Shared directory for the feed `file` and its `tmp` buffer file
|
|
||||||
target: PathBuf,
|
|
||||||
/// Creates temporary file to exclude feed format damage on update
|
|
||||||
tmp: PathBuf,
|
|
||||||
/// Trackers source for every item in channel
|
|
||||||
trackers: Option<HashSet<Url>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Rss {
|
|
||||||
/// Create writable file for given `filepath`
|
|
||||||
pub fn new(
|
|
||||||
filepath: &str,
|
|
||||||
title: &str,
|
|
||||||
link: &Option<String>,
|
|
||||||
description: &Option<String>,
|
|
||||||
trackers: Option<HashSet<Url>>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
// prevent from reading of the incomplete file
|
|
||||||
let tmp = PathBuf::from_str(&format!("{filepath}.tmp"))?;
|
|
||||||
|
|
||||||
// init public destination
|
|
||||||
let target = PathBuf::from_str(filepath)?;
|
|
||||||
if target.is_dir() {
|
|
||||||
bail!("RSS path `{}` is directory", target.to_string_lossy())
|
|
||||||
}
|
|
||||||
// init temporary file to write
|
|
||||||
let mut file = File::create(&tmp)?;
|
|
||||||
|
|
||||||
file.write_all(
|
|
||||||
b"<?xml version=\"1.0\" encoding=\"UTF-8\"?><rss version=\"2.0\"><channel>",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let t = chrono::Utc::now().to_rfc2822();
|
|
||||||
file.write_all(b"<pubDate>")?;
|
|
||||||
file.write_all(t.as_bytes())?;
|
|
||||||
file.write_all(b"</pubDate>")?;
|
|
||||||
file.write_all(b"<lastBuildDate>")?;
|
|
||||||
file.write_all(t.as_bytes())?;
|
|
||||||
file.write_all(b"</lastBuildDate>")?;
|
|
||||||
|
|
||||||
file.write_all(b"<title>")?;
|
|
||||||
file.write_all(escape(title).as_bytes())?;
|
|
||||||
file.write_all(b"</title>")?;
|
|
||||||
|
|
||||||
if let Some(s) = description {
|
|
||||||
file.write_all(b"<description>")?;
|
|
||||||
file.write_all(escape(s).as_bytes())?;
|
|
||||||
file.write_all(b"</description>")?
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(s) = link {
|
|
||||||
file.write_all(b"<link>")?;
|
|
||||||
file.write_all(escape(s).as_bytes())?;
|
|
||||||
file.write_all(b"</link>")?
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
file,
|
|
||||||
target,
|
|
||||||
trackers,
|
|
||||||
tmp,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Append `item` to the feed `channel`
|
|
||||||
pub fn push(
|
|
||||||
&mut self,
|
|
||||||
infohash: &str,
|
|
||||||
title: &str,
|
|
||||||
description: Option<String>,
|
|
||||||
pub_date: Option<&str>,
|
|
||||||
) -> Result<()> {
|
|
||||||
self.file.write_all(
|
|
||||||
format!(
|
|
||||||
"<item><guid>{infohash}</guid><title>{}</title><link>{}</link>",
|
|
||||||
escape(title),
|
|
||||||
escape(&crate::magnet(infohash, self.trackers.as_ref()))
|
|
||||||
)
|
|
||||||
.as_bytes(),
|
|
||||||
)?;
|
|
||||||
if let Some(s) = description {
|
|
||||||
self.file.write_all(b"<description>")?;
|
|
||||||
self.file.write_all(escape(&s).as_bytes())?;
|
|
||||||
self.file.write_all(b"</description>")?
|
|
||||||
}
|
|
||||||
if let Some(s) = pub_date {
|
|
||||||
self.file.write_all(b"<pubDate>")?;
|
|
||||||
self.file.write_all(escape(s).as_bytes())?;
|
|
||||||
self.file.write_all(b"</pubDate>")?
|
|
||||||
}
|
|
||||||
self.file.write_all(b"</item>")?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write final bytes, replace public file with temporary one
|
|
||||||
pub fn commit(mut self) -> Result<()> {
|
|
||||||
self.file.write_all(b"</channel></rss>")?;
|
|
||||||
std::fs::rename(self.tmp, self.target)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn item_description(
|
|
||||||
size: Option<u64>,
|
|
||||||
list: Option<&Vec<(Option<String>, u64)>>,
|
|
||||||
) -> Option<String> {
|
|
||||||
use crate::format::Format;
|
|
||||||
if size.is_none() && list.is_none() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let mut b = Vec::with_capacity(list.map(|l| l.len()).unwrap_or_default() + 1);
|
|
||||||
if let Some(s) = size {
|
|
||||||
b.push(s.bytes())
|
|
||||||
}
|
|
||||||
if let Some(l) = list {
|
|
||||||
for (path, size) in l {
|
|
||||||
b.push(format!(
|
|
||||||
"{} ({})",
|
|
||||||
path.as_deref().unwrap_or("?"), // @TODO invalid encoding
|
|
||||||
size.bytes()
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(b.join("\n"))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn escape(subject: &str) -> String {
|
|
||||||
subject
|
|
||||||
.replace('&', "&")
|
|
||||||
.replace('<', "<")
|
|
||||||
.replace('>', ">")
|
|
||||||
.replace('"', """)
|
|
||||||
.replace("'", "'")
|
|
||||||
}
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
use anyhow::Result;
|
|
||||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
|
||||||
|
|
||||||
pub struct Torrent {
|
|
||||||
storage: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Torrent {
|
|
||||||
pub fn init(path: &str) -> Result<Self> {
|
|
||||||
Ok(Self {
|
|
||||||
storage: PathBuf::from_str(path)?.canonicalize()?,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn persist(&self, infohash: &str, data: &[u8]) -> Result<Option<PathBuf>> {
|
|
||||||
Ok(if self.path(infohash).exists() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
let p = self.path(infohash);
|
|
||||||
let mut f = fs::File::create(&p)?;
|
|
||||||
f.write_all(data)?;
|
|
||||||
Some(p)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn path(&self, infohash: &str) -> PathBuf {
|
|
||||||
let mut p = PathBuf::new();
|
|
||||||
p.push(&self.storage);
|
|
||||||
p.push(format!("{infohash}.torrent"));
|
|
||||||
p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
use std::{collections::HashSet, str::FromStr};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
pub struct Trackers(HashSet<Url>);
|
|
||||||
|
|
||||||
impl Trackers {
|
|
||||||
pub fn init(trackers: &Vec<String>) -> anyhow::Result<Self> {
|
|
||||||
let mut t = HashSet::with_capacity(trackers.len());
|
|
||||||
for tracker in trackers {
|
|
||||||
t.insert(Url::from_str(tracker)?);
|
|
||||||
}
|
|
||||||
Ok(Self(t))
|
|
||||||
}
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.0.is_empty()
|
|
||||||
}
|
|
||||||
pub fn list(&self) -> &HashSet<Url> {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue