mirror of
https://github.com/YGGverse/aquatic.git
synced 2026-03-31 17:55:36 +00:00
aquatic http protocol: move urlencode fns to utils module, clean up
This commit is contained in:
parent
8fea96bcd2
commit
909f0edce9
2 changed files with 90 additions and 149 deletions
|
|
@ -158,6 +158,12 @@ impl Request {
|
|||
/// values with same key, and serde_qs pulls in lots of dependencies. Both
|
||||
/// would need preprocessing for the binary format used for info_hash and
|
||||
/// peer_id.
|
||||
///
|
||||
/// The info hashes and peer id's that are received are url-encoded byte
|
||||
/// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as
|
||||
/// UTF-8 string, meaning that non-ascii bytes are invalid characters.
|
||||
/// Therefore, these bytes must be converted to their equivalent multi-byte
|
||||
/// UTF-8 encodings.
|
||||
pub fn from_http_get_path(path: &str) -> anyhow::Result<Self> {
|
||||
::log::debug!("request GET path: {}", path);
|
||||
|
||||
|
|
@ -194,12 +200,12 @@ impl Request {
|
|||
|
||||
match key {
|
||||
"info_hash" => {
|
||||
let value = Self::urldecode_20_bytes(value)?;
|
||||
let value = urldecode_20_bytes(value)?;
|
||||
|
||||
info_hashes.push(InfoHash(value));
|
||||
},
|
||||
"peer_id" => {
|
||||
let value = Self::urldecode_20_bytes(value)?;
|
||||
let value = urldecode_20_bytes(value)?;
|
||||
|
||||
opt_peer_id = Some(PeerId(value));
|
||||
},
|
||||
|
|
@ -264,127 +270,6 @@ impl Request {
|
|||
}
|
||||
}
|
||||
|
||||
/// The info hashes and peer id's that are received are url-encoded byte
|
||||
/// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as
|
||||
/// UTF-8 string, meaning that non-ascii bytes are invalid characters.
|
||||
/// Therefore, these bytes must be converted to their equivalent multi-byte
|
||||
/// UTF-8 encodings.
|
||||
fn urldecode(value: &str) -> anyhow::Result<String> {
|
||||
let mut processed = String::new();
|
||||
|
||||
for (i, part) in value.split('%').enumerate(){
|
||||
if i == 0 {
|
||||
processed.push_str(part);
|
||||
} else if part.len() >= 2 {
|
||||
let mut two_first = String::with_capacity(2);
|
||||
|
||||
for (j, c) in part.chars().enumerate(){
|
||||
if j == 0 {
|
||||
two_first.push(c);
|
||||
} else if j == 1 {
|
||||
two_first.push(c);
|
||||
|
||||
let byte = u8::from_str_radix(&two_first, 16)?;
|
||||
|
||||
processed.push(byte as char);
|
||||
} else {
|
||||
processed.push(c);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"url decode: too few characters in '%{}'", part
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
Ok(processed)
|
||||
}
|
||||
|
||||
/// Quite a bit faster than non-memchr version
|
||||
fn urldecode_memchr(value: &str) -> anyhow::Result<SmartString<LazyCompact>> {
|
||||
let mut processed = SmartString::new();
|
||||
|
||||
let bytes = value.as_bytes();
|
||||
let iter = ::memchr::memchr_iter(b'%', bytes);
|
||||
|
||||
let mut str_index_after_hex = 0usize;
|
||||
|
||||
for i in iter {
|
||||
match (bytes.get(i), bytes.get(i + 1), bytes.get(i + 2)){
|
||||
(Some(0..=127), Some(0..=127), Some(0..=127)) => {
|
||||
if i > 0 {
|
||||
processed.push_str(&value[str_index_after_hex..i]);
|
||||
}
|
||||
|
||||
str_index_after_hex = i + 3;
|
||||
|
||||
let hex = &value[i + 1..i + 3];
|
||||
let byte = u8::from_str_radix(&hex, 16)?;
|
||||
|
||||
processed.push(byte as char);
|
||||
},
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"invalid urlencoded segment at byte {} in {}", i, value
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rest_of_str) = value.get(str_index_after_hex..){
|
||||
processed.push_str(rest_of_str);
|
||||
}
|
||||
|
||||
Ok(processed)
|
||||
}
|
||||
|
||||
fn urldecode_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> {
|
||||
let mut out_arr = [0u8; 20];
|
||||
|
||||
let mut chars = value.chars();
|
||||
|
||||
for i in 0..20 {
|
||||
let c = chars.next()
|
||||
.with_context(|| "less than 20 chars")?;
|
||||
|
||||
if c as u32 > 255 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"character not in single byte range: {:#?}",
|
||||
c
|
||||
));
|
||||
}
|
||||
|
||||
if c == '%' {
|
||||
let first = chars.next()
|
||||
.with_context(|| "missing first urldecode char in pair")?;
|
||||
let second = chars.next()
|
||||
.with_context(|| "missing second urldecode char in pair")?;
|
||||
|
||||
let hex = [first as u8, second as u8];
|
||||
|
||||
hex::decode_to_slice(&hex, &mut out_arr[i..i+1]).map_err(|err|
|
||||
anyhow::anyhow!("hex decode error: {:?}", err)
|
||||
)?;
|
||||
} else {
|
||||
if c as u32 > 255 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"character not in single byte range: {:#?}",
|
||||
c
|
||||
));
|
||||
}
|
||||
|
||||
out_arr[i] = c as u8;
|
||||
}
|
||||
}
|
||||
|
||||
if chars.next().is_some(){
|
||||
return Err(anyhow::anyhow!("more than 20 chars"));
|
||||
}
|
||||
|
||||
Ok(out_arr)
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Vec<u8> {
|
||||
match self {
|
||||
Self::Announce(r) => r.as_bytes(),
|
||||
|
|
@ -403,19 +288,6 @@ mod tests {
|
|||
static REFERENCE_INFO_HASH: [u8; 20] = [0x04, 0x0b, b'k', b'V', 0x3f, 0x5c, b'r', 0x14, 0xa6, 0xb7, 0x98, 0xad, b'C', 0xc3, 0xc9, b'.', 0x40, 0x24, 0x00, 0xb9];
|
||||
static REFERENCE_PEER_ID: [u8; 20] = [b'-', b'A', b'B', b'C', b'9', b'4', b'0', b'-', b'5', b'e', b'r', b't', b'6', b'9', b'm', b'u', b'w', b'5', b't', b'8'];
|
||||
|
||||
#[test]
|
||||
fn test_urldecode(){
|
||||
let f = Request::urldecode_memchr;
|
||||
|
||||
assert_eq!(f("").unwrap(), "".to_string());
|
||||
assert_eq!(f("abc").unwrap(), "abc".to_string());
|
||||
assert_eq!(f("%21").unwrap(), "!".to_string());
|
||||
assert_eq!(f("%21%3D").unwrap(), "!=".to_string());
|
||||
assert_eq!(f("abc%21def%3Dghi").unwrap(), "abc!def=ghi".to_string());
|
||||
assert!(f("%").is_err());
|
||||
assert!(f("%å7").is_err());
|
||||
}
|
||||
|
||||
fn get_reference_announce_request() -> Request {
|
||||
Request::Announce(AnnounceRequest {
|
||||
info_hash: InfoHash(REFERENCE_INFO_HASH),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use std::io::Write;
|
||||
|
||||
use anyhow::Context;
|
||||
use serde::Serializer;
|
||||
use smartstring::{SmartString, LazyCompact};
|
||||
|
||||
|
|
@ -19,13 +20,34 @@ pub fn urlencode_20_bytes(input: [u8; 20], output: &mut impl Write){
|
|||
}
|
||||
|
||||
|
||||
/// Not for serde
|
||||
pub fn deserialize_20_bytes(value: SmartString<LazyCompact>) -> anyhow::Result<[u8; 20]> {
|
||||
let mut arr = [0u8; 20];
|
||||
let mut char_iter = value.chars();
|
||||
pub fn urldecode_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> {
|
||||
let mut out_arr = [0u8; 20];
|
||||
|
||||
for a in arr.iter_mut(){
|
||||
if let Some(c) = char_iter.next(){
|
||||
let mut chars = value.chars();
|
||||
|
||||
for i in 0..20 {
|
||||
let c = chars.next()
|
||||
.with_context(|| "less than 20 chars")?;
|
||||
|
||||
if c as u32 > 255 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"character not in single byte range: {:#?}",
|
||||
c
|
||||
));
|
||||
}
|
||||
|
||||
if c == '%' {
|
||||
let first = chars.next()
|
||||
.with_context(|| "missing first urldecode char in pair")?;
|
||||
let second = chars.next()
|
||||
.with_context(|| "missing second urldecode char in pair")?;
|
||||
|
||||
let hex = [first as u8, second as u8];
|
||||
|
||||
hex::decode_to_slice(&hex, &mut out_arr[i..i+1]).map_err(|err|
|
||||
anyhow::anyhow!("hex decode error: {:?}", err)
|
||||
)?;
|
||||
} else {
|
||||
if c as u32 > 255 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"character not in single byte range: {:#?}",
|
||||
|
|
@ -33,17 +55,53 @@ pub fn deserialize_20_bytes(value: SmartString<LazyCompact>) -> anyhow::Result<[
|
|||
));
|
||||
}
|
||||
|
||||
*a = c as u8;
|
||||
} else {
|
||||
return Err(anyhow::anyhow!("less than 20 bytes: {:#?}", value));
|
||||
out_arr[i] = c as u8;
|
||||
}
|
||||
}
|
||||
|
||||
if char_iter.next().is_some(){
|
||||
Err(anyhow::anyhow!("more than 20 bytes: {:#?}", value))
|
||||
} else {
|
||||
Ok(arr)
|
||||
if chars.next().is_some(){
|
||||
return Err(anyhow::anyhow!("more than 20 chars"));
|
||||
}
|
||||
|
||||
Ok(out_arr)
|
||||
}
|
||||
|
||||
|
||||
pub fn urldecode(value: &str) -> anyhow::Result<SmartString<LazyCompact>> {
|
||||
let mut processed = SmartString::new();
|
||||
|
||||
let bytes = value.as_bytes();
|
||||
let iter = ::memchr::memchr_iter(b'%', bytes);
|
||||
|
||||
let mut str_index_after_hex = 0usize;
|
||||
|
||||
for i in iter {
|
||||
match (bytes.get(i), bytes.get(i + 1), bytes.get(i + 2)){
|
||||
(Some(0..=127), Some(0..=127), Some(0..=127)) => {
|
||||
if i > 0 {
|
||||
processed.push_str(&value[str_index_after_hex..i]);
|
||||
}
|
||||
|
||||
str_index_after_hex = i + 3;
|
||||
|
||||
let hex = &value[i + 1..i + 3];
|
||||
let byte = u8::from_str_radix(&hex, 16)?;
|
||||
|
||||
processed.push(byte as char);
|
||||
},
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"invalid urlencoded segment at byte {} in {}", i, value
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rest_of_str) = value.get(str_index_after_hex..){
|
||||
processed.push_str(rest_of_str);
|
||||
}
|
||||
|
||||
Ok(processed)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -117,4 +175,15 @@ mod tests {
|
|||
assert_eq!(chunk, reference);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_urldecode(){
|
||||
assert_eq!(urldecode("").unwrap(), "".to_string());
|
||||
assert_eq!(urldecode("abc").unwrap(), "abc".to_string());
|
||||
assert_eq!(urldecode("%21").unwrap(), "!".to_string());
|
||||
assert_eq!(urldecode("%21%3D").unwrap(), "!=".to_string());
|
||||
assert_eq!(urldecode("abc%21def%3Dghi").unwrap(), "abc!def=ghi".to_string());
|
||||
assert!(urldecode("%").is_err());
|
||||
assert!(urldecode("%å7").is_err());
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue