mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
remove extra features
This commit is contained in:
parent
1395257882
commit
8cbae5019d
12 changed files with 153 additions and 972 deletions
10
Cargo.toml
10
Cargo.toml
|
|
@ -4,25 +4,25 @@ version = "0.2.0"
|
|||
edition = "2024"
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
description = "SSD-friendly crawler for the Aquatic BitTorrent tracker based on librqbit API"
|
||||
keywords = ["aquatic", "librqbit", "rqbit", "crawler", "bittorrent"]
|
||||
description = "SSD-friendly crawler for the Aquatic BitTorrent tracker, based on the librqbit API"
|
||||
keywords = ["aquatic", "librqbit", "bittorrent", "crawler", "resolver"]
|
||||
categories = ["network-programming"]
|
||||
repository = "https://github.com/YGGverse/aquatic-crawler"
|
||||
# homepage = "https://yggverse.github.io"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
chrono = "0.4.41"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
hyper-util = "0.1"
|
||||
librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]}
|
||||
log = "0.4"
|
||||
regex = "1.11"
|
||||
tokio = { version = "1.45", features = ["full"] }
|
||||
tracing-subscriber = "0.3"
|
||||
url = "2.5"
|
||||
urlencoding = "2.1"
|
||||
voca_rs = "1.15"
|
||||
walkdir = "2.5"
|
||||
|
||||
[patch.crates-io]
|
||||
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
||||
#librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" }
|
||||
166
README.md
166
README.md
|
|
@ -4,37 +4,11 @@
|
|||
[](https://deps.rs/repo/github/YGGverse/aquatic-crawler)
|
||||
[](https://crates.io/crates/aquatic-crawler)
|
||||
|
||||
SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker based on [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API
|
||||
SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker, based on the [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API
|
||||
|
||||
> [!NOTE]
|
||||
> Compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API))
|
||||
|
||||
## Conception
|
||||
|
||||
See the project [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki)
|
||||
|
||||
## Features
|
||||
|
||||
> [!TIP]
|
||||
> For details on all implemented features, see the [Options](#options) section
|
||||
|
||||
* Info-hash versions
|
||||
* [x] 1
|
||||
* [ ] 2
|
||||
* Import sources
|
||||
* [x] IPv4 / IPv6 info-hash binary API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||
* [x] local file path
|
||||
* [ ] remote URL
|
||||
* Export options
|
||||
* [x] Content (`--preload`)
|
||||
* [x] data match the regex pattern (`--preload-regex`)
|
||||
* [x] data match limits (see `--preload-*` options group)
|
||||
* [x] Resolved `.torrent` files (`--export-torrents`)
|
||||
* [x] RSS feed (`--export-rss`) includes resolved torrent meta and magnet links to download
|
||||
* customize feed options with `--export-rss-*` options group
|
||||
* [ ] [Gemtext](https://geminiprotocol.net/docs/gemtext.gmi) static files catalog
|
||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search index
|
||||
* [ ] SQLite database index
|
||||
> * requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), see the [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic) for more details
|
||||
> * compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API))
|
||||
|
||||
## Install
|
||||
|
||||
|
|
@ -53,140 +27,12 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
|||
--infohash /path/to/another-source.bin\
|
||||
--tracker udp://host1:port\
|
||||
--tracker udp://host2:port\
|
||||
--preload /path/to/directory\
|
||||
--enable-tcp
|
||||
--preload /path/to/directory
|
||||
```
|
||||
* append `RUST_LOG=debug` to debug
|
||||
|
||||
### Options
|
||||
|
||||
``` bash
|
||||
-d, --debug
|
||||
Print debug output
|
||||
|
||||
--infohash <INFOHASH>
|
||||
Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
||||
* PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||
|
||||
--tracker <TRACKER>
|
||||
Define custom tracker(s) to preload the `.torrent` files info
|
||||
|
||||
--initial-peer <INITIAL_PEER>
|
||||
Define initial peer(s) to preload the `.torrent` files info
|
||||
|
||||
--export-torrents <EXPORT_TORRENTS>
|
||||
Save resolved torrent files to given directory
|
||||
|
||||
--export-rss <EXPORT_RSS>
|
||||
File path to export RSS feed
|
||||
|
||||
--export-rss-title <EXPORT_RSS_TITLE>
|
||||
Custom title for RSS feed (channel)
|
||||
|
||||
[default: aquatic-crawler]
|
||||
|
||||
--export-rss-link <EXPORT_RSS_LINK>
|
||||
Custom link for RSS feed (channel)
|
||||
|
||||
--export-rss-description <EXPORT_RSS_DESCRIPTION>
|
||||
Custom description for RSS feed (channel)
|
||||
|
||||
--export-trackers
|
||||
Appends `--tracker` value to magnets and torrents
|
||||
|
||||
--enable-dht
|
||||
Enable DHT resolver
|
||||
|
||||
--enable-tcp
|
||||
Enable TCP connection
|
||||
|
||||
--bind <BIND>
|
||||
Bind resolver session on specified device name (`tun0`, `mycelium`, etc.)
|
||||
|
||||
--listen <LISTEN>
|
||||
Bind listener on specified `host:port` (`[host]:port` for IPv6)
|
||||
|
||||
* this option is useful only for binding the data exchange service,
|
||||
to restrict the outgoing connections for torrent resolver, use `bind` option instead
|
||||
|
||||
--listen-upnp
|
||||
Enable UPnP forwarding
|
||||
|
||||
--enable-upload
|
||||
Enable upload (share bytes received with BitTorrent network)
|
||||
|
||||
--preload <PRELOAD>
|
||||
Directory path to store preloaded data (e.g. `.torrent` files)
|
||||
|
||||
--preload-clear
|
||||
Clear previous data collected on crawl session start
|
||||
|
||||
--preload-regex <PRELOAD_REGEX>
|
||||
Preload only files match regex pattern (list only without preload by default)
|
||||
* see also `preload_max_filesize`, `preload_max_filecount` options
|
||||
|
||||
## Example:
|
||||
|
||||
Filter by image ext ``` --preload-regex '(png|gif|jpeg|jpg|webp)$' ```
|
||||
|
||||
* requires `storage` argument defined
|
||||
|
||||
--preload-total-size <PRELOAD_TOTAL_SIZE>
|
||||
Stop crawler on total preload files size reached
|
||||
|
||||
--preload-max-filesize <PRELOAD_MAX_FILESIZE>
|
||||
Max size sum of preloaded files per torrent (match `preload_regex`)
|
||||
|
||||
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
|
||||
Max count of preloaded files per torrent (match `preload_regex`)
|
||||
|
||||
--proxy-url <PROXY_URL>
|
||||
Use `socks5://[username:password@]host:port`
|
||||
|
||||
--peer-connect-timeout <PEER_CONNECT_TIMEOUT>
|
||||
|
||||
|
||||
--peer-read-write-timeout <PEER_READ_WRITE_TIMEOUT>
|
||||
|
||||
|
||||
--peer-keep-alive-interval <PEER_KEEP_ALIVE_INTERVAL>
|
||||
|
||||
|
||||
--index-capacity <INDEX_CAPACITY>
|
||||
Estimated info-hash index capacity
|
||||
|
||||
[default: 1000]
|
||||
|
||||
--index-list
|
||||
Index torrent files
|
||||
|
||||
--index-list-limit <INDEX_LIST_LIMIT>
|
||||
Limit torrent files quantity to index
|
||||
* insert the `...` placeholder as the last item, with total size left
|
||||
|
||||
[default: 100]
|
||||
|
||||
--index-timeout <INDEX_TIMEOUT>
|
||||
Remove records from index older than `seconds`
|
||||
|
||||
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
||||
Max time to handle each torrent
|
||||
|
||||
[default: 10]
|
||||
|
||||
--sleep <SLEEP>
|
||||
Crawl loop delay in seconds
|
||||
|
||||
[default: 300]
|
||||
|
||||
--upload-limit <UPLOAD_LIMIT>
|
||||
Limit upload speed (b/s)
|
||||
|
||||
--download-limit <DOWNLOAD_LIMIT>
|
||||
Limit download speed (b/s)
|
||||
|
||||
-h, --help
|
||||
Print help (see a summary with '-h')
|
||||
|
||||
-V, --version
|
||||
Print version
|
||||
aquatic-crawler --help
|
||||
```
|
||||
|
|
@ -1,12 +1,11 @@
|
|||
use clap::Parser;
|
||||
use regex::Regex;
|
||||
use std::{net::SocketAddr, path::PathBuf};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Config {
|
||||
/// Print debug output
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
pub debug: bool,
|
||||
|
||||
/// Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
||||
///
|
||||
/// * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||
|
|
@ -15,31 +14,11 @@ pub struct Config {
|
|||
|
||||
/// Define custom tracker(s) to preload the `.torrent` files info
|
||||
#[arg(long)]
|
||||
pub tracker: Vec<String>,
|
||||
pub tracker: Vec<Url>,
|
||||
|
||||
/// Define initial peer(s) to preload the `.torrent` files info
|
||||
#[arg(long)]
|
||||
pub initial_peer: Vec<String>,
|
||||
|
||||
/// Save resolved torrent files to given directory
|
||||
#[arg(long)]
|
||||
pub export_torrents: Option<String>,
|
||||
|
||||
/// File path to export RSS feed
|
||||
#[arg(long)]
|
||||
pub export_rss: Option<String>,
|
||||
|
||||
/// Custom title for RSS feed (channel)
|
||||
#[arg(long, default_value_t = String::from("aquatic-crawler"))]
|
||||
pub export_rss_title: String,
|
||||
|
||||
/// Custom link for RSS feed (channel)
|
||||
#[arg(long)]
|
||||
pub export_rss_link: Option<String>,
|
||||
|
||||
/// Custom description for RSS feed (channel)
|
||||
#[arg(long)]
|
||||
pub export_rss_description: Option<String>,
|
||||
pub initial_peer: Option<Vec<SocketAddr>>,
|
||||
|
||||
/// Appends `--tracker` value to magnets and torrents
|
||||
#[arg(long, default_value_t = false)]
|
||||
|
|
@ -49,9 +28,9 @@ pub struct Config {
|
|||
#[arg(long, default_value_t = false)]
|
||||
pub enable_dht: bool,
|
||||
|
||||
/// Enable TCP connection
|
||||
/// Disable TCP connection
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub enable_tcp: bool,
|
||||
pub disable_tcp: bool,
|
||||
|
||||
/// Bind resolver session on specified device name (`tun0`, `mycelium`, etc.)
|
||||
#[arg(long)]
|
||||
|
|
@ -74,11 +53,7 @@ pub struct Config {
|
|||
|
||||
/// Directory path to store preloaded data (e.g. `.torrent` files)
|
||||
#[arg(long)]
|
||||
pub preload: Option<String>,
|
||||
|
||||
/// Clear previous data collected on crawl session start
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub preload_clear: bool,
|
||||
pub preload: PathBuf,
|
||||
|
||||
/// Preload only files match regex pattern (list only without preload by default)
|
||||
/// * see also `preload_max_filesize`, `preload_max_filecount` options
|
||||
|
|
@ -92,11 +67,7 @@ pub struct Config {
|
|||
///
|
||||
/// * requires `storage` argument defined
|
||||
#[arg(long)]
|
||||
pub preload_regex: Option<String>,
|
||||
|
||||
/// Stop crawler on total preload files size reached
|
||||
#[arg(long)]
|
||||
pub preload_total_size: Option<u64>,
|
||||
pub preload_regex: Option<Regex>,
|
||||
|
||||
/// Max size sum of preloaded files per torrent (match `preload_regex`)
|
||||
#[arg(long)]
|
||||
|
|
@ -108,7 +79,7 @@ pub struct Config {
|
|||
|
||||
/// Use `socks5://[username:password@]host:port`
|
||||
#[arg(long)]
|
||||
pub proxy_url: Option<String>,
|
||||
pub proxy_url: Option<Url>,
|
||||
|
||||
// Peer options
|
||||
#[arg(long)]
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
pub trait Format {
|
||||
/// Format bytes to KB/MB/GB presentation
|
||||
fn bytes(self) -> String;
|
||||
}
|
||||
|
||||
impl Format for u64 {
|
||||
fn bytes(self) -> String {
|
||||
const KB: f32 = 1024.0;
|
||||
const MB: f32 = KB * KB;
|
||||
const GB: f32 = MB * KB;
|
||||
|
||||
let f = self as f32;
|
||||
|
||||
if f < KB {
|
||||
format!("{self} B")
|
||||
} else if f < MB {
|
||||
format!("{:.2} KB", f / KB)
|
||||
} else if f < GB {
|
||||
format!("{:.2} MB", f / MB)
|
||||
} else {
|
||||
format!("{:.2} GB", f / GB)
|
||||
}
|
||||
}
|
||||
}
|
||||
111
src/index.rs
111
src/index.rs
|
|
@ -1,111 +0,0 @@
|
|||
mod value;
|
||||
|
||||
use chrono::{Duration, Utc};
|
||||
use std::collections::HashMap;
|
||||
use value::Value;
|
||||
|
||||
/// Collect processed info hashes to skip on the next iterations (for this session)
|
||||
/// * also contains optional meta info to export index as RSS or any other format
|
||||
pub struct Index {
|
||||
index: HashMap<String, Value>,
|
||||
/// Removes outdated values from `index` on `Self::refresh` action
|
||||
timeout: Option<Duration>,
|
||||
/// Track index changes to prevent extra disk write operations (safe SSD life)
|
||||
/// * useful in the static RSS feed generation case, if enabled
|
||||
is_changed: bool,
|
||||
/// Store the index value in memory only when it is in use by the init options
|
||||
has_name: bool,
|
||||
has_size: bool,
|
||||
has_list: bool,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn init(
|
||||
capacity: usize,
|
||||
timeout: Option<i64>,
|
||||
has_name: bool,
|
||||
has_size: bool,
|
||||
has_list: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
index: HashMap::with_capacity(capacity),
|
||||
timeout: timeout.map(Duration::seconds),
|
||||
has_size,
|
||||
has_name,
|
||||
has_list,
|
||||
is_changed: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has(&self, infohash: &str) -> bool {
|
||||
self.index.contains_key(infohash)
|
||||
}
|
||||
|
||||
pub fn is_changed(&self) -> bool {
|
||||
self.is_changed
|
||||
}
|
||||
|
||||
pub fn list(&self) -> &HashMap<String, Value> {
|
||||
&self.index
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.index.len()
|
||||
}
|
||||
|
||||
pub fn nodes(&self) -> u64 {
|
||||
self.index.values().map(|i| i.node).sum::<u64>()
|
||||
}
|
||||
|
||||
pub fn insert(
|
||||
&mut self,
|
||||
infohash: String,
|
||||
node: u64,
|
||||
size: u64,
|
||||
list: Option<Vec<(Option<String>, u64)>>,
|
||||
name: Option<String>,
|
||||
) {
|
||||
if self
|
||||
.index
|
||||
.insert(
|
||||
infohash,
|
||||
Value::new(
|
||||
node,
|
||||
if self.has_size { Some(size) } else { None },
|
||||
if self.has_name { name } else { None },
|
||||
if self.has_list { list } else { None },
|
||||
),
|
||||
)
|
||||
.is_none()
|
||||
{
|
||||
self.is_changed = true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn refresh(&mut self) {
|
||||
if let Some(timeout) = self.timeout {
|
||||
let t = Utc::now();
|
||||
self.index.retain(|_, v| t - v.time <= timeout)
|
||||
}
|
||||
self.is_changed = false
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
use std::{thread::sleep, time::Duration};
|
||||
|
||||
// test values auto-clean by timeout
|
||||
let mut i = Index::init(2, Some(3), false, false, false);
|
||||
|
||||
i.insert("h1".to_string(), 0, 0, None, None);
|
||||
sleep(Duration::from_secs(1));
|
||||
i.insert("h2".to_string(), 0, 0, None, None);
|
||||
|
||||
i.refresh();
|
||||
assert_eq!(i.len(), 2);
|
||||
|
||||
sleep(Duration::from_secs(2));
|
||||
i.refresh();
|
||||
assert_eq!(i.len(), 1)
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
use voca_rs::Voca;
|
||||
|
||||
/// The `Index` value
|
||||
pub struct Value {
|
||||
pub time: DateTime<Utc>,
|
||||
pub node: u64,
|
||||
// Isolate by applying internal filter on value set
|
||||
size: Option<u64>,
|
||||
name: Option<String>,
|
||||
list: Option<Vec<(Option<String>, u64)>>,
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Create new `Self` with current timestamp
|
||||
pub fn new(
|
||||
node: u64,
|
||||
size: Option<u64>,
|
||||
name: Option<String>,
|
||||
list: Option<Vec<(Option<String>, u64)>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
time: Utc::now(),
|
||||
node,
|
||||
size,
|
||||
list: list.map(|f| f.into_iter().map(|(n, l)| (filter(n), l)).collect()),
|
||||
name: filter(name),
|
||||
}
|
||||
}
|
||||
/// Get reference to the safely constructed `name` member
|
||||
pub fn name(&self) -> Option<&String> {
|
||||
self.name.as_ref()
|
||||
}
|
||||
/// Get reference to the safely constructed files `list` member
|
||||
pub fn list(&self) -> Option<&Vec<(Option<String>, u64)>> {
|
||||
self.list.as_ref()
|
||||
}
|
||||
/// Get reference to the safely constructed `length` member
|
||||
pub fn size(&self) -> Option<u64> {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip tags and bom chars, crop long strings (prevents memory pool overload)
|
||||
fn filter(value: Option<String>) -> Option<String> {
|
||||
value.map(|v| {
|
||||
const C: usize = 125; // + 3 for `...` offset, 128 chars max @TODO optional
|
||||
let s = v._strip_bom()._strip_tags();
|
||||
if s.chars().count() > C {
|
||||
return format!("{}...", s.chars().take(C).collect::<String>());
|
||||
}
|
||||
s
|
||||
})
|
||||
}
|
||||
351
src/main.rs
351
src/main.rs
|
|
@ -1,25 +1,15 @@
|
|||
mod api;
|
||||
mod config;
|
||||
mod format;
|
||||
mod index;
|
||||
mod peers;
|
||||
mod preload;
|
||||
mod rss;
|
||||
mod torrent;
|
||||
mod trackers;
|
||||
|
||||
use anyhow::Result;
|
||||
use config::Config;
|
||||
use index::Index;
|
||||
use librqbit::{
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions,
|
||||
ListenerOptions, PeerConnectionOptions, SessionOptions, ValidatedTorrentMetaV1Info,
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, ListenerOptions,
|
||||
PeerConnectionOptions, SessionOptions,
|
||||
};
|
||||
use peers::Peers;
|
||||
use rss::Rss;
|
||||
use std::{collections::HashSet, num::NonZero, path::PathBuf, str::FromStr, time::Duration};
|
||||
use torrent::Torrent;
|
||||
use trackers::Trackers;
|
||||
use preload::Preload;
|
||||
use std::{collections::HashSet, num::NonZero, str::FromStr, time::Duration};
|
||||
use url::Url;
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -27,29 +17,21 @@ async fn main() -> Result<()> {
|
|||
use chrono::Local;
|
||||
use clap::Parser;
|
||||
use tokio::time;
|
||||
|
||||
// init debug
|
||||
if std::env::var("RUST_LOG").is_ok() {
|
||||
tracing_subscriber::fmt::init()
|
||||
} // librqbit
|
||||
// init components
|
||||
let time_init = Local::now();
|
||||
let config = Config::parse();
|
||||
if std::env::var("RUST_LOG").is_ok() {
|
||||
tracing_subscriber::fmt::init()
|
||||
}
|
||||
let peers = Peers::init(&config.initial_peer)?;
|
||||
let preload = preload::init(
|
||||
let preload = Preload::init(
|
||||
config.preload,
|
||||
config.preload_regex,
|
||||
config.preload_max_filecount,
|
||||
config.preload_max_filesize,
|
||||
config.preload_total_size,
|
||||
config.preload_clear,
|
||||
)?;
|
||||
let trackers = Trackers::init(&config.tracker)?;
|
||||
let torrent = config.export_torrents.map(|p| Torrent::init(&p).unwrap());
|
||||
let session = librqbit::Session::new_with_opts(
|
||||
match preload {
|
||||
Some(ref p) => p.path(),
|
||||
None => PathBuf::new(),
|
||||
},
|
||||
preload.root().clone(),
|
||||
SessionOptions {
|
||||
bind_device_name: config.bind,
|
||||
listen: match config.listen {
|
||||
|
|
@ -66,8 +48,8 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
},
|
||||
connect: Some(ConnectionOptions {
|
||||
enable_tcp: config.enable_tcp,
|
||||
proxy_url: config.proxy_url,
|
||||
enable_tcp: !config.disable_tcp,
|
||||
proxy_url: config.proxy_url.map(|u| u.to_string()),
|
||||
peer_opts: Some(PeerConnectionOptions {
|
||||
connect_timeout: config.peer_connect_timeout.map(Duration::from_secs),
|
||||
read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs),
|
||||
|
|
@ -82,54 +64,35 @@ async fn main() -> Result<()> {
|
|||
upload_bps: config.upload_limit.and_then(NonZero::new),
|
||||
download_bps: config.download_limit.and_then(NonZero::new),
|
||||
},
|
||||
trackers: trackers.list().clone(),
|
||||
trackers: config.tracker.iter().cloned().collect(),
|
||||
..SessionOptions::default()
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
// begin
|
||||
println!("Crawler started on {time_init}");
|
||||
let mut index = Index::init(
|
||||
config.index_capacity,
|
||||
config.index_timeout,
|
||||
config.export_rss.is_some(),
|
||||
config.export_rss.is_some(),
|
||||
config.export_rss.is_some() && config.index_list,
|
||||
);
|
||||
log::info!("Crawler started on {time_init}");
|
||||
loop {
|
||||
let time_queue = Local::now();
|
||||
if config.debug {
|
||||
println!("\tQueue crawl begin on {time_queue}...")
|
||||
}
|
||||
index.refresh();
|
||||
log::debug!("Queue crawl begin on {time_queue}...");
|
||||
for source in &config.infohash {
|
||||
if config.debug {
|
||||
println!("\tIndex source `{source}`...")
|
||||
}
|
||||
log::debug!("Index source `{source}`...");
|
||||
// grab latest info-hashes from this source
|
||||
// * aquatic server may update the stats at this moment, handle result manually
|
||||
for i in match api::get(source, config.index_capacity) {
|
||||
Some(i) => i,
|
||||
None => {
|
||||
// skip without panic
|
||||
if config.debug {
|
||||
eprintln!(
|
||||
"The feed `{source}` has an incomplete format (or is still updating); skip."
|
||||
)
|
||||
}
|
||||
log::warn!(
|
||||
"The feed `{source}` has an incomplete format (or is still updating); skip."
|
||||
);
|
||||
continue;
|
||||
}
|
||||
} {
|
||||
// convert to string once
|
||||
let i = i.to_string();
|
||||
// is already indexed?
|
||||
if index.has(&i) {
|
||||
if preload.contains_torrent(&i)? {
|
||||
continue;
|
||||
}
|
||||
if config.debug {
|
||||
println!("\t\tIndex `{i}`...")
|
||||
}
|
||||
log::debug!("Index `{i}`...");
|
||||
// run the crawler in single thread for performance reasons,
|
||||
// use `timeout` argument option to skip the dead connections.
|
||||
match time::timeout(
|
||||
|
|
@ -137,18 +100,18 @@ async fn main() -> Result<()> {
|
|||
session.add_torrent(
|
||||
AddTorrent::from_url(magnet(
|
||||
&i,
|
||||
if config.export_trackers && !trackers.is_empty() {
|
||||
Some(trackers.list())
|
||||
} else {
|
||||
if config.tracker.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(config.tracker.as_ref())
|
||||
},
|
||||
)),
|
||||
Some(AddTorrentOptions {
|
||||
paused: true, // continue after `only_files` init
|
||||
overwrite: true,
|
||||
disable_trackers: trackers.is_empty(),
|
||||
initial_peers: peers.initial_peers(),
|
||||
list_only: preload.as_ref().is_none_or(|p| p.regex.is_none()),
|
||||
disable_trackers: config.tracker.is_empty(),
|
||||
initial_peers: config.initial_peer.clone(),
|
||||
list_only: false,
|
||||
// it is important to blacklist all files preload until initiation
|
||||
only_files: Some(Vec::with_capacity(
|
||||
config.preload_max_filecount.unwrap_or_default(),
|
||||
|
|
@ -156,8 +119,9 @@ async fn main() -> Result<()> {
|
|||
// the destination folder to preload files match `only_files_regex`
|
||||
// * e.g. images for audio albums
|
||||
output_folder: preload
|
||||
.as_ref()
|
||||
.map(|p| p.output_folder(&i, true).unwrap()),
|
||||
.output_folder(&i)?
|
||||
.to_str()
|
||||
.map(|s| s.to_string()),
|
||||
..Default::default()
|
||||
}),
|
||||
),
|
||||
|
|
@ -167,183 +131,79 @@ async fn main() -> Result<()> {
|
|||
Ok(r) => match r {
|
||||
// on `preload_regex` case only
|
||||
Ok(AddTorrentResponse::Added(id, mt)) => {
|
||||
let mut only_files_size = 0;
|
||||
let mut only_files_keep = Vec::with_capacity(
|
||||
let mut keep_files = HashSet::with_capacity(
|
||||
config.preload_max_filecount.unwrap_or_default(),
|
||||
);
|
||||
let mut only_files = HashSet::with_capacity(
|
||||
config.preload_max_filecount.unwrap_or_default(),
|
||||
);
|
||||
mt.wait_until_initialized().await?;
|
||||
let (name, size, list) = mt.with_metadata(|m| {
|
||||
// init preload files list
|
||||
if let Some(ref p) = preload {
|
||||
for (id, info) in m.file_infos.iter().enumerate() {
|
||||
if p.matches(info.relative_filename.to_str().unwrap()) {
|
||||
if p.max_filesize.is_some_and(|limit| {
|
||||
only_files_size + info.len > limit
|
||||
}) {
|
||||
if config.debug {
|
||||
println!(
|
||||
"\t\t\ttotal files size limit `{i}` reached!"
|
||||
)
|
||||
}
|
||||
break;
|
||||
}
|
||||
if p.max_filecount
|
||||
.is_some_and(|limit| only_files.len() + 1 > limit)
|
||||
{
|
||||
if config.debug {
|
||||
println!(
|
||||
"\t\t\ttotal files count limit for `{i}` reached!"
|
||||
)
|
||||
}
|
||||
break;
|
||||
}
|
||||
only_files_size += info.len;
|
||||
if let Some(ref p) = preload {
|
||||
only_files_keep
|
||||
.push(p.absolute(&i, &info.relative_filename))
|
||||
}
|
||||
only_files.insert(id);
|
||||
}
|
||||
let bytes = mt.with_metadata(|m| {
|
||||
for (id, info) in m.file_infos.iter().enumerate() {
|
||||
if preload
|
||||
.max_filecount
|
||||
.is_some_and(|limit| only_files.len() + 1 > limit)
|
||||
{
|
||||
log::debug!(
|
||||
"file count limit reached, skip `{id}` for `{i}`"
|
||||
);
|
||||
break;
|
||||
}
|
||||
if preload.max_filesize.is_some_and(|limit| info.len > limit) {
|
||||
log::debug!(
|
||||
"file size limit reached, skip `{id}` for `{i}`"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if preload.regex.as_ref().is_some_and(|r| {
|
||||
!r.is_match(&info.relative_filename.to_string_lossy())
|
||||
}) {
|
||||
log::debug!("regex filter, skip `{id}` for `{i}`");
|
||||
continue;
|
||||
}
|
||||
assert!(keep_files.insert(info.relative_filename.clone()));
|
||||
assert!(only_files.insert(id));
|
||||
}
|
||||
if let Some(ref t) = torrent {
|
||||
save_torrent_file(t, &i, &m.torrent_bytes, config.debug)
|
||||
}
|
||||
|
||||
(
|
||||
m.info.name().as_ref().map(|n| n.to_string()),
|
||||
size(&m.info),
|
||||
list(&m.info, config.index_list_limit),
|
||||
)
|
||||
m.torrent_bytes.to_vec()
|
||||
})?;
|
||||
session.update_only_files(&mt, &only_files).await?;
|
||||
session.unpause(&mt).await?;
|
||||
// await for `preload_regex` files download to continue
|
||||
mt.wait_until_completed().await?;
|
||||
// cleanup irrelevant files (see rqbit#408)
|
||||
preload.cleanup(&i, Some(keep_files))?;
|
||||
preload.persist_torrent_bytes(&i, &bytes)?;
|
||||
// remove torrent from session as indexed
|
||||
session
|
||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||
.await?;
|
||||
// cleanup irrelevant files (see rqbit#408)
|
||||
if let Some(p) = &preload {
|
||||
p.cleanup(&i, Some(only_files_keep))?
|
||||
}
|
||||
|
||||
if config.debug {
|
||||
println!("\t\t\tadd `{i}` to index.")
|
||||
}
|
||||
|
||||
index.insert(
|
||||
i,
|
||||
only_files_size,
|
||||
size,
|
||||
list,
|
||||
name.map(|n| n.to_string()),
|
||||
)
|
||||
log::debug!("torrent `{i}` indexed.")
|
||||
}
|
||||
Ok(AddTorrentResponse::ListOnly(r)) => {
|
||||
if let Some(ref t) = torrent {
|
||||
save_torrent_file(t, &i, &r.torrent_bytes, config.debug)
|
||||
}
|
||||
|
||||
// @TODO
|
||||
// use `r.info` for Memory, SQLite,
|
||||
// Manticore and other alternative storage type
|
||||
|
||||
if config.debug {
|
||||
println!("\t\t\tadd `{i}` to index.")
|
||||
}
|
||||
|
||||
index.insert(
|
||||
i,
|
||||
0,
|
||||
size(&r.info),
|
||||
list(&r.info, config.index_list_limit),
|
||||
r.info.name().map(|n| n.to_string()),
|
||||
)
|
||||
}
|
||||
// unexpected as should be deleted
|
||||
Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
|
||||
Err(e) => eprintln!("Failed to resolve `{i}`: `{e}`."),
|
||||
Ok(_) => panic!(),
|
||||
Err(e) => log::debug!("Failed to resolve `{i}`: `{e}`."),
|
||||
},
|
||||
Err(e) => {
|
||||
if config.debug {
|
||||
println!("\t\t\tfailed to resolve `{i}`: `{e}`")
|
||||
}
|
||||
}
|
||||
Err(e) => log::debug!("failed to resolve `{i}`: `{e}`"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref export_rss) = config.export_rss
|
||||
&& index.is_changed()
|
||||
{
|
||||
let mut rss = Rss::new(
|
||||
export_rss,
|
||||
&config.export_rss_title,
|
||||
&config.export_rss_link,
|
||||
&config.export_rss_description,
|
||||
if config.export_trackers && !trackers.is_empty() {
|
||||
Some(trackers.list().clone())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
)?;
|
||||
for (k, v) in index.list() {
|
||||
rss.push(
|
||||
k,
|
||||
v.name().unwrap_or(k),
|
||||
rss::item_description(v.size(), v.list()),
|
||||
Some(&v.time.to_rfc2822()),
|
||||
)?
|
||||
}
|
||||
rss.commit()?
|
||||
}
|
||||
if preload
|
||||
.as_ref()
|
||||
.is_some_and(|p| p.total_size.is_some_and(|s| index.nodes() > s))
|
||||
{
|
||||
panic!("Preload content size {} bytes reached!", 0)
|
||||
}
|
||||
if config.debug {
|
||||
println!(
|
||||
"Queue completed on {time_queue}\n\ttotal: {}\n\ttime: {} s\n\tuptime: {} s\n\tawait {} seconds to continue...",
|
||||
index.len(),
|
||||
Local::now()
|
||||
.signed_duration_since(time_queue)
|
||||
.as_seconds_f32(),
|
||||
Local::now()
|
||||
.signed_duration_since(time_init)
|
||||
.as_seconds_f32(),
|
||||
config.sleep,
|
||||
)
|
||||
}
|
||||
log::debug!(
|
||||
"Queue completed at {time_queue} (time: {} / uptime: {}) await {} seconds to continue...",
|
||||
Local::now()
|
||||
.signed_duration_since(time_queue)
|
||||
.as_seconds_f32(),
|
||||
Local::now()
|
||||
.signed_duration_since(time_init)
|
||||
.as_seconds_f32(),
|
||||
config.sleep,
|
||||
);
|
||||
std::thread::sleep(Duration::from_secs(config.sleep))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared handler function to save resolved torrents as file
|
||||
fn save_torrent_file(t: &Torrent, i: &str, b: &[u8], d: bool) {
|
||||
match t.persist(i, b) {
|
||||
Ok(r) => {
|
||||
if d {
|
||||
match r {
|
||||
Some(p) => println!("\t\t\tadd torrent file `{}`", p.to_string_lossy()),
|
||||
None => println!("\t\t\ttorrent file `{i}` already exists"),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!("Error on save torrent file `{i}`: `{e}`"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build magnet URI
|
||||
fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
||||
let mut m = if infohash.len() == 40 {
|
||||
format!("magnet:?xt=urn:btih:{infohash}")
|
||||
fn magnet(info_hash: &str, trackers: Option<&Vec<Url>>) -> String {
|
||||
let mut m = if info_hash.len() == 40 {
|
||||
format!("magnet:?xt=urn:btih:{info_hash}")
|
||||
} else {
|
||||
todo!("infohash v2 is not supported by librqbit")
|
||||
};
|
||||
|
|
@ -355,62 +215,3 @@ fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
|||
}
|
||||
m
|
||||
}
|
||||
|
||||
/// Count total size, including torrent files
|
||||
fn size(meta: &ValidatedTorrentMetaV1Info<ByteBufOwned>) -> u64 {
|
||||
let mut t = 0;
|
||||
if let Some(l) = meta.info().length {
|
||||
t += l
|
||||
}
|
||||
if let Some(ref files) = meta.info().files {
|
||||
for f in files {
|
||||
t += f.length
|
||||
}
|
||||
}
|
||||
t
|
||||
}
|
||||
|
||||
fn list(
|
||||
meta: &ValidatedTorrentMetaV1Info<ByteBufOwned>,
|
||||
limit: usize,
|
||||
) -> Option<Vec<(Option<String>, u64)>> {
|
||||
meta.info().files.as_ref().map(|files| {
|
||||
let mut b = Vec::with_capacity(files.len());
|
||||
let mut i = files.iter();
|
||||
let mut t = 0;
|
||||
for f in i.by_ref() {
|
||||
if t < limit {
|
||||
t += 1;
|
||||
b.push((
|
||||
String::from_utf8(
|
||||
f.path
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(n, b)| {
|
||||
if n == 0 {
|
||||
b.0.to_vec()
|
||||
} else {
|
||||
let mut p = vec![b'/'];
|
||||
p.extend(b.0.to_vec());
|
||||
p
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
.ok(),
|
||||
f.length,
|
||||
));
|
||||
continue;
|
||||
}
|
||||
// limit reached: count sizes left and use placeholder as the last item name
|
||||
let mut l = 0;
|
||||
for f in i.by_ref() {
|
||||
l += f.length
|
||||
}
|
||||
b.push((Some("...".to_string()), l));
|
||||
break;
|
||||
}
|
||||
b[..t].sort_by(|a, b| a.0.cmp(&b.0)); // @TODO optional
|
||||
b
|
||||
})
|
||||
}
|
||||
|
|
|
|||
21
src/peers.rs
21
src/peers.rs
|
|
@ -1,21 +0,0 @@
|
|||
use std::{net::SocketAddr, str::FromStr};
|
||||
|
||||
pub struct Peers(Vec<SocketAddr>);
|
||||
|
||||
impl Peers {
|
||||
pub fn init(peers: &Vec<String>) -> anyhow::Result<Self> {
|
||||
let mut p = Vec::with_capacity(peers.len());
|
||||
for peer in peers {
|
||||
p.push(SocketAddr::from_str(peer)?);
|
||||
}
|
||||
Ok(Self(p))
|
||||
}
|
||||
|
||||
pub fn initial_peers(&self) -> Option<Vec<SocketAddr>> {
|
||||
if self.0.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(self.0.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
143
src/preload.rs
143
src/preload.rs
|
|
@ -1,123 +1,90 @@
|
|||
use anyhow::{Result, bail};
|
||||
use regex::Regex;
|
||||
use std::{fs, path::PathBuf, str::FromStr};
|
||||
use std::{collections::HashSet, fs, path::PathBuf};
|
||||
|
||||
pub struct Preload {
|
||||
path: PathBuf,
|
||||
root: PathBuf,
|
||||
pub max_filecount: Option<usize>,
|
||||
pub max_filesize: Option<u64>,
|
||||
pub total_size: Option<u64>,
|
||||
pub regex: Option<Regex>,
|
||||
}
|
||||
|
||||
impl Preload {
|
||||
fn init(
|
||||
directory: &str,
|
||||
regex: Option<String>,
|
||||
// Constructors
|
||||
|
||||
pub fn init(
|
||||
root: PathBuf,
|
||||
regex: Option<Regex>,
|
||||
max_filecount: Option<usize>,
|
||||
max_filesize: Option<u64>,
|
||||
total_size: Option<u64>,
|
||||
is_clear: bool,
|
||||
) -> Result<Self> {
|
||||
let path = PathBuf::from_str(directory)?;
|
||||
if let Ok(t) = fs::metadata(&path) {
|
||||
if t.is_file() {
|
||||
bail!("Storage destination is not directory!");
|
||||
}
|
||||
if t.is_dir() && is_clear {
|
||||
for i in fs::read_dir(&path)? {
|
||||
let r = i?.path();
|
||||
if r.is_dir() {
|
||||
fs::remove_dir_all(&r)?;
|
||||
} else {
|
||||
fs::remove_file(&r)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !root.is_dir() {
|
||||
bail!("Preload root is not directory")
|
||||
}
|
||||
fs::create_dir_all(&path)?;
|
||||
Ok(Self {
|
||||
max_filecount,
|
||||
max_filesize,
|
||||
path,
|
||||
regex: regex.map(|r| Regex::new(&r).unwrap()),
|
||||
total_size,
|
||||
regex,
|
||||
root: root.canonicalize()?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn output_folder(&self, infohash: &str, create: bool) -> Result<String> {
|
||||
let mut p = PathBuf::new();
|
||||
p.push(&self.path);
|
||||
p.push(infohash);
|
||||
if p.is_file() {
|
||||
bail!("File destination is not directory!");
|
||||
}
|
||||
if create {
|
||||
fs::create_dir_all(&p)?;
|
||||
}
|
||||
if !p.is_dir() {
|
||||
bail!("Destination directory not exists!")
|
||||
}
|
||||
Ok(p.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
|
||||
let mut p = PathBuf::new();
|
||||
p.push(&self.path);
|
||||
p.push(infohash);
|
||||
p.push(file);
|
||||
p
|
||||
}
|
||||
// Actions
|
||||
|
||||
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
||||
pub fn cleanup(&self, infohash: &str, keep_filenames: Option<Vec<PathBuf>>) -> Result<()> {
|
||||
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
|
||||
pub fn cleanup(&self, info_hash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
|
||||
for e in walkdir::WalkDir::new(self.output_folder(info_hash)?) {
|
||||
let e = e?;
|
||||
let p = e.into_path();
|
||||
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
|
||||
fs::remove_file(p)?;
|
||||
}
|
||||
}
|
||||
} // remove empty directories @TODO
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn path(&self) -> PathBuf {
|
||||
self.path.clone()
|
||||
pub fn persist_torrent_bytes(&self, info_hash: &str, contents: &[u8]) -> Result<PathBuf> {
|
||||
let p = self.torrent(info_hash)?;
|
||||
fs::write(&p, contents)?;
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
pub fn matches(&self, pattern: &str) -> bool {
|
||||
self.regex.as_ref().is_some_and(|r| r.is_match(pattern))
|
||||
}
|
||||
}
|
||||
// Getters
|
||||
|
||||
/// Init `Preload` with validate related argument options
|
||||
pub fn init(
|
||||
path: Option<String>,
|
||||
regex: Option<String>,
|
||||
max_filecount: Option<usize>,
|
||||
max_filesize: Option<u64>,
|
||||
total_size: Option<u64>,
|
||||
is_clear: bool,
|
||||
) -> Result<Option<Preload>> {
|
||||
Ok(match path {
|
||||
Some(ref p) => Some(Preload::init(
|
||||
p,
|
||||
regex,
|
||||
max_filecount,
|
||||
max_filesize,
|
||||
total_size,
|
||||
is_clear,
|
||||
)?),
|
||||
None => {
|
||||
if regex.is_some()
|
||||
|| max_filecount.is_some()
|
||||
|| max_filesize.is_some()
|
||||
|| total_size.is_some()
|
||||
|| is_clear
|
||||
{
|
||||
bail!("`--preload` directory is required for this configuration!")
|
||||
}
|
||||
None
|
||||
/// * creates new directory if not exists
|
||||
pub fn output_folder(&self, info_hash: &str) -> Result<PathBuf> {
|
||||
if !is_info_hash(info_hash) {
|
||||
bail!("Invalid info-hash `{info_hash}`")
|
||||
}
|
||||
})
|
||||
let mut p = PathBuf::from(&self.root);
|
||||
p.push(info_hash);
|
||||
if p.is_file() {
|
||||
bail!("Output directory for info-hash `{info_hash}` is file")
|
||||
}
|
||||
if !p.exists() {
|
||||
fs::create_dir(&p)?
|
||||
}
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
pub fn root(&self) -> &PathBuf {
|
||||
&self.root
|
||||
}
|
||||
|
||||
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
|
||||
Ok(fs::exists(self.torrent(info_hash)?)?)
|
||||
}
|
||||
|
||||
fn torrent(&self, info_hash: &str) -> Result<PathBuf> {
|
||||
if !is_info_hash(info_hash) {
|
||||
bail!("Invalid info-hash `{info_hash}`")
|
||||
}
|
||||
let mut p = PathBuf::from(&self.root);
|
||||
p.push(format!("{info_hash}.torrent"));
|
||||
Ok(p)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_info_hash(value: &str) -> bool {
|
||||
value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
|
||||
}
|
||||
|
|
|
|||
142
src/rss.rs
142
src/rss.rs
|
|
@ -1,142 +0,0 @@
|
|||
use anyhow::{Result, bail};
|
||||
use std::{collections::HashSet, fs::File, io::Write, path::PathBuf, str::FromStr};
|
||||
use url::Url;
|
||||
|
||||
/// Export crawl index to the RSS file
|
||||
pub struct Rss {
|
||||
/// Resulting (public) file in the XML format
|
||||
file: File,
|
||||
/// Shared directory for the feed `file` and its `tmp` buffer file
|
||||
target: PathBuf,
|
||||
/// Creates temporary file to exclude feed format damage on update
|
||||
tmp: PathBuf,
|
||||
/// Trackers source for every item in channel
|
||||
trackers: Option<HashSet<Url>>,
|
||||
}
|
||||
|
||||
impl Rss {
|
||||
/// Create writable file for given `filepath`
|
||||
pub fn new(
|
||||
filepath: &str,
|
||||
title: &str,
|
||||
link: &Option<String>,
|
||||
description: &Option<String>,
|
||||
trackers: Option<HashSet<Url>>,
|
||||
) -> Result<Self> {
|
||||
// prevent from reading of the incomplete file
|
||||
let tmp = PathBuf::from_str(&format!("{filepath}.tmp"))?;
|
||||
|
||||
// init public destination
|
||||
let target = PathBuf::from_str(filepath)?;
|
||||
if target.is_dir() {
|
||||
bail!("RSS path `{}` is directory", target.to_string_lossy())
|
||||
}
|
||||
// init temporary file to write
|
||||
let mut file = File::create(&tmp)?;
|
||||
|
||||
file.write_all(
|
||||
b"<?xml version=\"1.0\" encoding=\"UTF-8\"?><rss version=\"2.0\"><channel>",
|
||||
)?;
|
||||
|
||||
let t = chrono::Utc::now().to_rfc2822();
|
||||
file.write_all(b"<pubDate>")?;
|
||||
file.write_all(t.as_bytes())?;
|
||||
file.write_all(b"</pubDate>")?;
|
||||
file.write_all(b"<lastBuildDate>")?;
|
||||
file.write_all(t.as_bytes())?;
|
||||
file.write_all(b"</lastBuildDate>")?;
|
||||
|
||||
file.write_all(b"<title>")?;
|
||||
file.write_all(escape(title).as_bytes())?;
|
||||
file.write_all(b"</title>")?;
|
||||
|
||||
if let Some(s) = description {
|
||||
file.write_all(b"<description>")?;
|
||||
file.write_all(escape(s).as_bytes())?;
|
||||
file.write_all(b"</description>")?
|
||||
}
|
||||
|
||||
if let Some(s) = link {
|
||||
file.write_all(b"<link>")?;
|
||||
file.write_all(escape(s).as_bytes())?;
|
||||
file.write_all(b"</link>")?
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
file,
|
||||
target,
|
||||
trackers,
|
||||
tmp,
|
||||
})
|
||||
}
|
||||
|
||||
/// Append `item` to the feed `channel`
|
||||
pub fn push(
|
||||
&mut self,
|
||||
infohash: &str,
|
||||
title: &str,
|
||||
description: Option<String>,
|
||||
pub_date: Option<&str>,
|
||||
) -> Result<()> {
|
||||
self.file.write_all(
|
||||
format!(
|
||||
"<item><guid>{infohash}</guid><title>{}</title><link>{}</link>",
|
||||
escape(title),
|
||||
escape(&crate::magnet(infohash, self.trackers.as_ref()))
|
||||
)
|
||||
.as_bytes(),
|
||||
)?;
|
||||
if let Some(s) = description {
|
||||
self.file.write_all(b"<description>")?;
|
||||
self.file.write_all(escape(&s).as_bytes())?;
|
||||
self.file.write_all(b"</description>")?
|
||||
}
|
||||
if let Some(s) = pub_date {
|
||||
self.file.write_all(b"<pubDate>")?;
|
||||
self.file.write_all(escape(s).as_bytes())?;
|
||||
self.file.write_all(b"</pubDate>")?
|
||||
}
|
||||
self.file.write_all(b"</item>")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write final bytes, replace public file with temporary one
|
||||
pub fn commit(mut self) -> Result<()> {
|
||||
self.file.write_all(b"</channel></rss>")?;
|
||||
std::fs::rename(self.tmp, self.target)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn item_description(
|
||||
size: Option<u64>,
|
||||
list: Option<&Vec<(Option<String>, u64)>>,
|
||||
) -> Option<String> {
|
||||
use crate::format::Format;
|
||||
if size.is_none() && list.is_none() {
|
||||
return None;
|
||||
}
|
||||
let mut b = Vec::with_capacity(list.map(|l| l.len()).unwrap_or_default() + 1);
|
||||
if let Some(s) = size {
|
||||
b.push(s.bytes())
|
||||
}
|
||||
if let Some(l) = list {
|
||||
for (path, size) in l {
|
||||
b.push(format!(
|
||||
"{} ({})",
|
||||
path.as_deref().unwrap_or("?"), // @TODO invalid encoding
|
||||
size.bytes()
|
||||
))
|
||||
}
|
||||
}
|
||||
Some(b.join("\n"))
|
||||
}
|
||||
|
||||
fn escape(subject: &str) -> String {
|
||||
subject
|
||||
.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
use anyhow::Result;
|
||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
||||
|
||||
pub struct Torrent {
|
||||
storage: PathBuf,
|
||||
}
|
||||
|
||||
impl Torrent {
|
||||
pub fn init(path: &str) -> Result<Self> {
|
||||
Ok(Self {
|
||||
storage: PathBuf::from_str(path)?.canonicalize()?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn persist(&self, infohash: &str, data: &[u8]) -> Result<Option<PathBuf>> {
|
||||
Ok(if self.path(infohash).exists() {
|
||||
None
|
||||
} else {
|
||||
let p = self.path(infohash);
|
||||
let mut f = fs::File::create(&p)?;
|
||||
f.write_all(data)?;
|
||||
Some(p)
|
||||
})
|
||||
}
|
||||
|
||||
fn path(&self, infohash: &str) -> PathBuf {
|
||||
let mut p = PathBuf::new();
|
||||
p.push(&self.storage);
|
||||
p.push(format!("{infohash}.torrent"));
|
||||
p
|
||||
}
|
||||
}
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
use std::{collections::HashSet, str::FromStr};
|
||||
use url::Url;
|
||||
|
||||
pub struct Trackers(HashSet<Url>);
|
||||
|
||||
impl Trackers {
|
||||
pub fn init(trackers: &Vec<String>) -> anyhow::Result<Self> {
|
||||
let mut t = HashSet::with_capacity(trackers.len());
|
||||
for tracker in trackers {
|
||||
t.insert(Url::from_str(tracker)?);
|
||||
}
|
||||
Ok(Self(t))
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
pub fn list(&self) -> &HashSet<Url> {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue