diff --git a/crates/s3s-e2e/src/basic.rs b/crates/s3s-e2e/src/basic.rs index 0eb24fe8..b0adac82 100644 --- a/crates/s3s-e2e/src/basic.rs +++ b/crates/s3s-e2e/src/basic.rs @@ -24,6 +24,7 @@ pub fn register(tcx: &mut TestContext) { case!(tcx, Basic, Essential, test_head_operations); case!(tcx, Basic, Put, test_put_object_tiny); case!(tcx, Basic, Put, test_put_object_with_metadata); + case!(tcx, Basic, Put, test_put_object_with_utf8_metadata); case!(tcx, Basic, Put, test_put_object_larger); case!(tcx, Basic, Put, test_put_object_with_checksum_algorithm); case!(tcx, Basic, Put, test_put_object_with_content_checksums); @@ -359,6 +360,48 @@ impl Put { Ok(()) } + async fn test_put_object_with_utf8_metadata(self: Arc) -> Result { + let s3 = &self.s3; + let bucket = self.bucket.as_str(); + let key = "file-with-utf8-metadata"; + + let content = "object with UTF-8 metadata"; + + // Test various UTF-8 characters in metadata values + let test_cases = vec![ + ("chinese", "你好世界"), + ("japanese", "こんにちは世界"), + ("korean", "안녕하세요"), + ("emoji", "Hello 👋 World 🌍"), + ("special", "Café ñoño"), + ("mixed", "Test UTF-8: 你好 🎉 Café"), + ]; + + for (metadata_key, metadata_value) in test_cases { + s3.put_object() + .bucket(bucket) + .key(format!("{key}-{metadata_key}")) + .body(ByteStream::from_static(content.as_bytes())) + .metadata(metadata_key, metadata_value) + .send() + .await?; + + // Check metadata using head_object + let head_resp = s3 + .head_object() + .bucket(bucket) + .key(format!("{key}-{metadata_key}")) + .send() + .await?; + + let metadata = head_resp.metadata().unwrap(); + let value = metadata.get(metadata_key).unwrap(); + assert_eq!(value, metadata_value, "UTF-8 metadata mismatch for key: {metadata_key}"); + } + + Ok(()) + } + async fn test_put_object_larger(self: Arc) -> Result { let s3 = &self.s3; let bucket = self.bucket.as_str(); diff --git a/crates/s3s/src/http/de.rs b/crates/s3s/src/http/de.rs index f3639746..2dd54dcc 100644 --- a/crates/s3s/src/http/de.rs +++ b/crates/s3s/src/http/de.rs @@ -266,8 +266,26 @@ pub fn parse_opt_metadata(req: &Request) -> S3Result> { let val = iter.next().unwrap(); let None = iter.next() else { return Err(duplicate_header(name)) }; - let val = val.to_str().map_err(|err| invalid_header(err, name, val))?; - metadata.insert(key.into(), val.into()); + // First try to decode as ASCII using to_str() for backwards compatibility + let val_str = match val.to_str() { + Ok(s) => s.to_owned(), + Err(_) => { + // If that fails, decode bytes as UTF-8 directly for UTF-8 metadata support + String::from_utf8_simd(val.as_bytes().into()) + .map_err(|_| invalid_request!("metadata value is not valid UTF-8: {}", name.as_str()))? + } + }; + + // Try to percent-decode the value if it contains percent-encoding + let decoded = if val_str.contains('%') { + urlencoding::decode(&val_str) + .map_err(|_| invalid_request!("metadata value has invalid percent-encoding: {}", name.as_str()))? + .into_owned() + } else { + val_str + }; + + metadata.insert(key.into(), decoded); } if metadata.is_empty() { return Ok(None); diff --git a/crates/s3s/src/http/ordered_headers.rs b/crates/s3s/src/http/ordered_headers.rs index 446b2e1a..f9b61efb 100644 --- a/crates/s3s/src/http/ordered_headers.rs +++ b/crates/s3s/src/http/ordered_headers.rs @@ -5,6 +5,8 @@ use hyper::header::ToStrError; use crate::utils::stable_sort_by_first; +use std::borrow::Cow; + /// Immutable http header container #[derive(Debug, Default)] pub struct OrderedHeaders<'a> { @@ -38,14 +40,29 @@ impl<'a> OrderedHeaders<'a> { let mut headers: Vec<(&'a str, &'a str)> = Vec::with_capacity(map.len()); for (name, value) in map { - headers.push((name.as_str(), value.to_str()?)); + // First try to convert to ASCII str + let value_str = match value.to_str() { + Ok(s) => s, + Err(e) => { + // If that fails, try UTF-8 decoding for metadata headers + if name.as_str().starts_with("x-amz-meta-") { + // For metadata headers, decode as UTF-8 and leak to get 'static lifetime + // This is acceptable since these are short-lived request-scoped objects + let utf8_str = std::str::from_utf8(value.as_bytes()).map_err(|_| e)?; + Box::leak(utf8_str.to_owned().into_boxed_str()) + } else { + return Err(e); + } + } + }; + headers.push((name.as_str(), value_str)); } stable_sort_by_first(&mut headers); Ok(Self { headers }) } - fn get_all_pairs(&self, name: &str) -> impl Iterator + '_ + use<'a, '_> { + fn get_all_pairs(&self, name: &str) -> impl Iterator + '_ { let slice = self.headers.as_slice(); let lower_bound = slice.partition_point(|x| x.0 < name); @@ -55,7 +72,8 @@ impl<'a> OrderedHeaders<'a> { } pub fn get_all(&self, name: impl AsRef) -> impl Iterator + '_ { - self.get_all_pairs(name.as_ref()).map(|x| x.1) + let name_str = name.as_ref(); + self.get_all_pairs(name_str).map(|x| x.1) } fn get_unique_pair(&self, name: &'_ str) -> Option<(&'a str, &'a str)> { diff --git a/crates/s3s/src/http/ser.rs b/crates/s3s/src/http/ser.rs index c7d6be5f..4a9d1bc2 100644 --- a/crates/s3s/src/http/ser.rs +++ b/crates/s3s/src/http/ser.rs @@ -151,7 +151,10 @@ pub fn add_opt_metadata(res: &mut Response, metadata: Option) -> S3Res for (key, val) in map { write!(&mut buf, "x-amz-meta-{key}").unwrap(); let name = HeaderName::from_bytes(buf.as_bytes()).map_err(S3Error::internal_error)?; - let value = HeaderValue::try_from(val).map_err(S3Error::internal_error)?; + + // Percent-encode the value to ensure it's ASCII-safe for HTTP headers + let encoded_val = urlencoding::encode(&val); + let value = HeaderValue::try_from(encoded_val.as_ref()).map_err(S3Error::internal_error)?; res.headers.insert(name, value); buf.clear(); }