http: quit if any worker thread quits

This commit is contained in:
Joakim Frostegård 2024-02-03 22:34:42 +01:00
parent 4ca73630c4
commit d7e06468c3
8 changed files with 151 additions and 145 deletions

View file

@ -61,6 +61,7 @@
#### Fixed #### Fixed
* Fix bug where clean up after closing connections wasn't always done * Fix bug where clean up after closing connections wasn't always done
* Quit whole application if any worker thread quits
### aquatic_ws ### aquatic_ws

1
Cargo.lock generated
View file

@ -200,7 +200,6 @@ dependencies = [
"log", "log",
"memchr", "memchr",
"metrics", "metrics",
"metrics-exporter-prometheus",
"mimalloc", "mimalloc",
"once_cell", "once_cell",
"privdrop", "privdrop",

View file

@ -1,3 +1,4 @@
use std::fmt::Display;
use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4, SocketAddrV6}; use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc; use std::sync::Arc;
@ -198,3 +199,25 @@ pub fn spawn_prometheus_endpoint(
Ok(handle) Ok(handle)
} }
pub enum WorkerType {
Swarm(usize),
Socket(usize),
Statistics,
Signals,
#[cfg(feature = "prometheus")]
Prometheus,
}
impl Display for WorkerType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Swarm(index) => f.write_fmt(format_args!("Swarm worker {}", index + 1)),
Self::Socket(index) => f.write_fmt(format_args!("Socket worker {}", index + 1)),
Self::Statistics => f.write_str("Statistics worker"),
Self::Signals => f.write_str("Signals worker"),
#[cfg(feature = "prometheus")]
Self::Prometheus => f.write_str("Prometheus worker"),
}
}
}

View file

@ -19,11 +19,11 @@ name = "aquatic_http"
[features] [features]
default = ["prometheus"] default = ["prometheus"]
prometheus = ["metrics", "metrics-exporter-prometheus"] prometheus = ["aquatic_common/prometheus", "metrics"]
metrics = ["dep:metrics"] metrics = ["dep:metrics"]
[dependencies] [dependencies]
aquatic_common = { workspace = true, features = ["rustls", "glommio"] } aquatic_common = { workspace = true, features = ["rustls"] }
aquatic_http_protocol.workspace = true aquatic_http_protocol.workspace = true
aquatic_toml_config.workspace = true aquatic_toml_config.workspace = true
@ -40,8 +40,6 @@ httparse = "1"
itoa = "1" itoa = "1"
libc = "0.2" libc = "0.2"
log = "0.4" log = "0.4"
metrics = { version = "0.22", optional = true }
metrics-exporter-prometheus = { version = "0.13", optional = true, default-features = false, features = ["http-listener"] }
mimalloc = { version = "0.1", default-features = false } mimalloc = { version = "0.1", default-features = false }
memchr = "2" memchr = "2"
privdrop = "0.5" privdrop = "0.5"
@ -54,6 +52,9 @@ slotmap = "1"
socket2 = { version = "0.5", features = ["all"] } socket2 = { version = "0.5", features = ["all"] }
thiserror = "1" thiserror = "1"
# metrics feature
metrics = { version = "0.22", optional = true }
[dev-dependencies] [dev-dependencies]
quickcheck = "1" quickcheck = "1"
quickcheck_macros = "1" quickcheck_macros = "1"

View file

@ -1,22 +1,17 @@
use anyhow::Context; use anyhow::Context;
use aquatic_common::{ use aquatic_common::{
access_list::update_access_list, access_list::update_access_list, privileges::PrivilegeDropper,
cpu_pinning::{ rustls_config::create_rustls_config, ServerStartInstant, WorkerType,
glommio::{get_worker_placement, set_affinity_for_util_worker},
WorkerIndex,
},
privileges::PrivilegeDropper,
rustls_config::create_rustls_config,
PanicSentinelWatcher, ServerStartInstant,
}; };
use arc_swap::ArcSwap; use arc_swap::ArcSwap;
use common::State; use common::State;
use glommio::{channels::channel_mesh::MeshBuilder, prelude::*}; use glommio::{channels::channel_mesh::MeshBuilder, prelude::*};
use signal_hook::{ use signal_hook::{consts::SIGUSR1, iterator::Signals};
consts::{SIGTERM, SIGUSR1}, use std::{
iterator::Signals, sync::Arc,
thread::{sleep, Builder, JoinHandle},
time::Duration,
}; };
use std::sync::Arc;
use crate::config::Config; use crate::config::Config;
@ -30,32 +25,16 @@ pub const APP_VERSION: &str = env!("CARGO_PKG_VERSION");
const SHARED_CHANNEL_SIZE: usize = 1024; const SHARED_CHANNEL_SIZE: usize = 1024;
pub fn run(config: Config) -> ::anyhow::Result<()> { pub fn run(config: Config) -> ::anyhow::Result<()> {
let mut signals = Signals::new([SIGUSR1, SIGTERM])?; let mut signals = Signals::new([SIGUSR1])?;
#[cfg(feature = "prometheus")]
if config.metrics.run_prometheus_endpoint {
use metrics_exporter_prometheus::PrometheusBuilder;
PrometheusBuilder::new()
.with_http_listener(config.metrics.prometheus_endpoint_address)
.install()
.with_context(|| {
format!(
"Install prometheus endpoint on {}",
config.metrics.prometheus_endpoint_address
)
})?;
}
let state = State::default(); let state = State::default();
update_access_list(&config.access_list, &state.access_list)?; update_access_list(&config.access_list, &state.access_list)?;
let num_peers = config.socket_workers + config.swarm_workers; let request_mesh_builder = MeshBuilder::partial(
config.socket_workers + config.swarm_workers,
let request_mesh_builder = MeshBuilder::partial(num_peers, SHARED_CHANNEL_SIZE); SHARED_CHANNEL_SIZE,
);
let (sentinel_watcher, sentinel) = PanicSentinelWatcher::create_with_sentinel();
let priv_dropper = PrivilegeDropper::new(config.privileges.clone(), config.socket_workers); let priv_dropper = PrivilegeDropper::new(config.privileges.clone(), config.socket_workers);
let opt_tls_config = if config.network.enable_tls { let opt_tls_config = if config.network.enable_tls {
@ -69,28 +48,22 @@ pub fn run(config: Config) -> ::anyhow::Result<()> {
let server_start_instant = ServerStartInstant::new(); let server_start_instant = ServerStartInstant::new();
let mut executors = Vec::new(); let mut join_handles = Vec::new();
for i in 0..(config.socket_workers) { for i in 0..(config.socket_workers) {
let sentinel = sentinel.clone();
let config = config.clone(); let config = config.clone();
let state = state.clone(); let state = state.clone();
let opt_tls_config = opt_tls_config.clone(); let opt_tls_config = opt_tls_config.clone();
let request_mesh_builder = request_mesh_builder.clone(); let request_mesh_builder = request_mesh_builder.clone();
let priv_dropper = priv_dropper.clone(); let priv_dropper = priv_dropper.clone();
let placement = get_worker_placement( let handle = Builder::new()
&config.cpu_pinning, .name(format!("socket-{:02}", i + 1))
config.socket_workers, .spawn(move || {
config.swarm_workers, LocalExecutorBuilder::default()
WorkerIndex::SocketWorker(i), .make()
)?; .map_err(|err| anyhow::anyhow!("Spawning executor failed: {:#}", err))?
let builder = LocalExecutorBuilder::new(placement).name(&format!("socket-{:02}", i + 1)); .run(workers::socket::run_socket_worker(
let executor = builder
.spawn(move || async move {
workers::socket::run_socket_worker(
sentinel,
config, config,
state, state,
opt_tls_config, opt_tls_config,
@ -98,53 +71,54 @@ pub fn run(config: Config) -> ::anyhow::Result<()> {
priv_dropper, priv_dropper,
server_start_instant, server_start_instant,
i, i,
) ))
.await
}) })
.map_err(|err| anyhow::anyhow!("Spawning executor failed: {:#}", err))?; .context("spawn socket worker")?;
executors.push(executor); join_handles.push((WorkerType::Socket(i), handle));
} }
for i in 0..(config.swarm_workers) { for i in 0..(config.swarm_workers) {
let sentinel = sentinel.clone();
let config = config.clone(); let config = config.clone();
let state = state.clone(); let state = state.clone();
let request_mesh_builder = request_mesh_builder.clone(); let request_mesh_builder = request_mesh_builder.clone();
let placement = get_worker_placement( let handle = Builder::new()
&config.cpu_pinning, .name(format!("swarm-{:02}", i + 1))
config.socket_workers, .spawn(move || {
config.swarm_workers, LocalExecutorBuilder::default()
WorkerIndex::SwarmWorker(i), .make()
)?; .map_err(|err| anyhow::anyhow!("Spawning executor failed: {:#}", err))?
let builder = LocalExecutorBuilder::new(placement).name(&format!("swarm-{:02}", i + 1)); .run(workers::swarm::run_swarm_worker(
let executor = builder
.spawn(move || async move {
workers::swarm::run_swarm_worker(
sentinel,
config, config,
state, state,
request_mesh_builder, request_mesh_builder,
server_start_instant, server_start_instant,
i, i,
) ))
.await
}) })
.map_err(|err| anyhow::anyhow!("Spawning executor failed: {:#}", err))?; .context("spawn swarm worker")?;
executors.push(executor); join_handles.push((WorkerType::Swarm(i), handle));
} }
if config.cpu_pinning.active { #[cfg(feature = "prometheus")]
set_affinity_for_util_worker( if config.metrics.run_prometheus_endpoint {
&config.cpu_pinning, let handle = aquatic_common::spawn_prometheus_endpoint(
config.socket_workers, config.metrics.prometheus_endpoint_address,
config.swarm_workers, Some(Duration::from_secs(
config.cleaning.torrent_cleaning_interval * 2,
)),
)?; )?;
join_handles.push((WorkerType::Prometheus, handle));
} }
// Spawn signal handler thread
{
let handle: JoinHandle<anyhow::Result<()>> = Builder::new()
.name("signals".into())
.spawn(move || {
for signal in &mut signals { for signal in &mut signals {
match signal { match signal {
SIGUSR1 => { SIGUSR1 => {
@ -160,20 +134,42 @@ pub fn run(config: Config) -> ::anyhow::Result<()> {
::log::info!("successfully updated tls config"); ::log::info!("successfully updated tls config");
} }
Err(err) => ::log::error!("could not update tls config: {:#}", err), Err(err) => {
::log::error!("could not update tls config: {:#}", err)
} }
} }
} }
SIGTERM => {
if sentinel_watcher.panic_was_triggered() {
return Err(anyhow::anyhow!("worker thread panicked"));
} else {
return Ok(());
}
} }
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok(()) Ok(())
})
.context("spawn signal worker")?;
join_handles.push((WorkerType::Signals, handle));
}
loop {
for (i, (_, handle)) in join_handles.iter().enumerate() {
if handle.is_finished() {
let (worker_type, handle) = join_handles.remove(i);
match handle.join() {
Ok(Ok(())) => {
return Err(anyhow::anyhow!("{} stopped", worker_type));
}
Ok(Err(err)) => {
return Err(err.context(format!("{} stopped", worker_type)));
}
Err(_) => {
return Err(anyhow::anyhow!("{} panicked", worker_type));
}
}
}
}
sleep(Duration::from_secs(5));
}
} }

View file

@ -10,7 +10,7 @@ use std::time::Duration;
use anyhow::Context; use anyhow::Context;
use aquatic_common::privileges::PrivilegeDropper; use aquatic_common::privileges::PrivilegeDropper;
use aquatic_common::rustls_config::RustlsConfig; use aquatic_common::rustls_config::RustlsConfig;
use aquatic_common::{CanonicalSocketAddr, PanicSentinel, ServerStartInstant}; use aquatic_common::{CanonicalSocketAddr, ServerStartInstant};
use arc_swap::ArcSwap; use arc_swap::ArcSwap;
use futures_lite::future::race; use futures_lite::future::race;
use futures_lite::StreamExt; use futures_lite::StreamExt;
@ -32,7 +32,6 @@ struct ConnectionHandle {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub async fn run_socket_worker( pub async fn run_socket_worker(
_sentinel: PanicSentinel,
config: Config, config: Config,
state: State, state: State,
opt_tls_config: Option<Arc<ArcSwap<RustlsConfig>>>, opt_tls_config: Option<Arc<ArcSwap<RustlsConfig>>>,
@ -40,13 +39,16 @@ pub async fn run_socket_worker(
priv_dropper: PrivilegeDropper, priv_dropper: PrivilegeDropper,
server_start_instant: ServerStartInstant, server_start_instant: ServerStartInstant,
worker_index: usize, worker_index: usize,
) { ) -> anyhow::Result<()> {
let config = Rc::new(config); let config = Rc::new(config);
let access_list = state.access_list; let access_list = state.access_list;
let listener = create_tcp_listener(&config, priv_dropper).expect("create tcp listener"); let listener = create_tcp_listener(&config, priv_dropper).context("create tcp listener")?;
let (request_senders, _) = request_mesh_builder.join(Role::Producer).await.unwrap(); let (request_senders, _) = request_mesh_builder
.join(Role::Producer)
.await
.map_err(|err| anyhow::anyhow!("join request mesh: {:#}", err))?;
let request_senders = Rc::new(request_senders); let request_senders = Rc::new(request_senders);
let connection_handles = Rc::new(RefCell::new(HopSlotMap::with_key())); let connection_handles = Rc::new(RefCell::new(HopSlotMap::with_key()));
@ -145,6 +147,8 @@ pub async fn run_socket_worker(
} }
} }
} }
Ok(())
} }
async fn clean_connections( async fn clean_connections(

View file

@ -11,7 +11,7 @@ use glommio::{enclose, prelude::*};
use rand::prelude::SmallRng; use rand::prelude::SmallRng;
use rand::SeedableRng; use rand::SeedableRng;
use aquatic_common::{PanicSentinel, ServerStartInstant, ValidUntil}; use aquatic_common::{ServerStartInstant, ValidUntil};
use crate::common::*; use crate::common::*;
use crate::config::Config; use crate::config::Config;
@ -19,14 +19,16 @@ use crate::config::Config;
use self::storage::TorrentMaps; use self::storage::TorrentMaps;
pub async fn run_swarm_worker( pub async fn run_swarm_worker(
_sentinel: PanicSentinel,
config: Config, config: Config,
state: State, state: State,
request_mesh_builder: MeshBuilder<ChannelRequest, Partial>, request_mesh_builder: MeshBuilder<ChannelRequest, Partial>,
server_start_instant: ServerStartInstant, server_start_instant: ServerStartInstant,
worker_index: usize, worker_index: usize,
) { ) -> anyhow::Result<()> {
let (_, mut request_receivers) = request_mesh_builder.join(Role::Consumer).await.unwrap(); let (_, mut request_receivers) = request_mesh_builder
.join(Role::Consumer)
.await
.map_err(|err| anyhow::anyhow!("join request mesh: {:#}", err))?;
let torrents = Rc::new(RefCell::new(TorrentMaps::new(worker_index))); let torrents = Rc::new(RefCell::new(TorrentMaps::new(worker_index)));
let access_list = state.access_list; let access_list = state.access_list;
@ -82,6 +84,8 @@ pub async fn run_swarm_worker(
for handle in handles { for handle in handles {
handle.await; handle.await;
} }
Ok(())
} }
async fn handle_request_stream<S>( async fn handle_request_stream<S>(

View file

@ -3,11 +3,11 @@ pub mod config;
pub mod workers; pub mod workers;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt::Display;
use std::thread::{sleep, Builder, JoinHandle}; use std::thread::{sleep, Builder, JoinHandle};
use std::time::Duration; use std::time::Duration;
use anyhow::Context; use anyhow::Context;
use aquatic_common::WorkerType;
use crossbeam_channel::{bounded, unbounded}; use crossbeam_channel::{bounded, unbounded};
use signal_hook::consts::SIGUSR1; use signal_hook::consts::SIGUSR1;
use signal_hook::iterator::Signals; use signal_hook::iterator::Signals;
@ -233,25 +233,3 @@ pub fn run(config: Config) -> ::anyhow::Result<()> {
sleep(Duration::from_secs(5)); sleep(Duration::from_secs(5));
} }
} }
enum WorkerType {
Swarm(usize),
Socket(usize),
Statistics,
Signals,
#[cfg(feature = "prometheus")]
Prometheus,
}
impl Display for WorkerType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Swarm(index) => f.write_fmt(format_args!("Swarm worker {}", index + 1)),
Self::Socket(index) => f.write_fmt(format_args!("Socket worker {}", index + 1)),
Self::Statistics => f.write_str("Statistics worker"),
Self::Signals => f.write_str("Signals worker"),
#[cfg(feature = "prometheus")]
Self::Prometheus => f.write_str("Prometheus worker"),
}
}
}