Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions crates/s3s-e2e/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub fn register(tcx: &mut TestContext) {
case!(tcx, Basic, Essential, test_head_operations);
case!(tcx, Basic, Put, test_put_object_tiny);
case!(tcx, Basic, Put, test_put_object_with_metadata);
case!(tcx, Basic, Put, test_put_object_with_utf8_metadata);
case!(tcx, Basic, Put, test_put_object_larger);
case!(tcx, Basic, Put, test_put_object_with_checksum_algorithm);
case!(tcx, Basic, Put, test_put_object_with_content_checksums);
Expand Down Expand Up @@ -359,6 +360,48 @@ impl Put {
Ok(())
}

async fn test_put_object_with_utf8_metadata(self: Arc<Self>) -> Result {
let s3 = &self.s3;
let bucket = self.bucket.as_str();
let key = "file-with-utf8-metadata";

let content = "object with UTF-8 metadata";

// Test various UTF-8 characters in metadata values
let test_cases = vec![
("chinese", "你好世界"),
("japanese", "こんにちは世界"),
("korean", "안녕하세요"),
("emoji", "Hello 👋 World 🌍"),
("special", "Café ñoño"),
("mixed", "Test UTF-8: 你好 🎉 Café"),
];

for (metadata_key, metadata_value) in test_cases {
s3.put_object()
.bucket(bucket)
.key(format!("{key}-{metadata_key}"))
.body(ByteStream::from_static(content.as_bytes()))
.metadata(metadata_key, metadata_value)
.send()
.await?;

// Check metadata using head_object
let head_resp = s3
.head_object()
.bucket(bucket)
.key(format!("{key}-{metadata_key}"))
.send()
.await?;

let metadata = head_resp.metadata().unwrap();
let value = metadata.get(metadata_key).unwrap();
assert_eq!(value, metadata_value, "UTF-8 metadata mismatch for key: {metadata_key}");
}

Ok(())
}

async fn test_put_object_larger(self: Arc<Self>) -> Result {
let s3 = &self.s3;
let bucket = self.bucket.as_str();
Expand Down
22 changes: 20 additions & 2 deletions crates/s3s/src/http/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,26 @@ pub fn parse_opt_metadata(req: &Request) -> S3Result<Option<Metadata>> {
let val = iter.next().unwrap();
let None = iter.next() else { return Err(duplicate_header(name)) };

let val = val.to_str().map_err(|err| invalid_header(err, name, val))?;
metadata.insert(key.into(), val.into());
// First try to decode as ASCII using to_str() for backwards compatibility
let val_str = match val.to_str() {
Ok(s) => s.to_owned(),
Err(_) => {
// If that fails, decode bytes as UTF-8 directly for UTF-8 metadata support
String::from_utf8_simd(val.as_bytes().into())
.map_err(|_| invalid_request!("metadata value is not valid UTF-8: {}", name.as_str()))?
}
};

// Try to percent-decode the value if it contains percent-encoding
let decoded = if val_str.contains('%') {
urlencoding::decode(&val_str)
.map_err(|_| invalid_request!("metadata value has invalid percent-encoding: {}", name.as_str()))?
.into_owned()
} else {
val_str
};

metadata.insert(key.into(), decoded);
}
if metadata.is_empty() {
return Ok(None);
Expand Down
24 changes: 21 additions & 3 deletions crates/s3s/src/http/ordered_headers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use hyper::header::ToStrError;

use crate::utils::stable_sort_by_first;

use std::borrow::Cow;

/// Immutable http header container
#[derive(Debug, Default)]
pub struct OrderedHeaders<'a> {
Expand Down Expand Up @@ -38,14 +40,29 @@ impl<'a> OrderedHeaders<'a> {
let mut headers: Vec<(&'a str, &'a str)> = Vec::with_capacity(map.len());

for (name, value) in map {
headers.push((name.as_str(), value.to_str()?));
// First try to convert to ASCII str
let value_str = match value.to_str() {
Ok(s) => s,
Err(e) => {
// If that fails, try UTF-8 decoding for metadata headers
if name.as_str().starts_with("x-amz-meta-") {
// For metadata headers, decode as UTF-8 and leak to get 'static lifetime
// This is acceptable since these are short-lived request-scoped objects
let utf8_str = std::str::from_utf8(value.as_bytes()).map_err(|_| e)?;
Box::leak(utf8_str.to_owned().into_boxed_str())
} else {
return Err(e);
}
}
};
headers.push((name.as_str(), value_str));
}
stable_sort_by_first(&mut headers);

Ok(Self { headers })
}

fn get_all_pairs(&self, name: &str) -> impl Iterator<Item = (&'a str, &'a str)> + '_ + use<'a, '_> {
fn get_all_pairs(&self, name: &str) -> impl Iterator<Item = (&'a str, &'a str)> + '_ {
let slice = self.headers.as_slice();

let lower_bound = slice.partition_point(|x| x.0 < name);
Expand All @@ -55,7 +72,8 @@ impl<'a> OrderedHeaders<'a> {
}

pub fn get_all(&self, name: impl AsRef<str>) -> impl Iterator<Item = &'a str> + '_ {
self.get_all_pairs(name.as_ref()).map(|x| x.1)
let name_str = name.as_ref();
self.get_all_pairs(name_str).map(|x| x.1)
}

fn get_unique_pair(&self, name: &'_ str) -> Option<(&'a str, &'a str)> {
Expand Down
5 changes: 4 additions & 1 deletion crates/s3s/src/http/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ pub fn add_opt_metadata(res: &mut Response, metadata: Option<Metadata>) -> S3Res
for (key, val) in map {
write!(&mut buf, "x-amz-meta-{key}").unwrap();
let name = HeaderName::from_bytes(buf.as_bytes()).map_err(S3Error::internal_error)?;
let value = HeaderValue::try_from(val).map_err(S3Error::internal_error)?;

// Percent-encode the value to ensure it's ASCII-safe for HTTP headers
let encoded_val = urlencoding::encode(&val);
let value = HeaderValue::try_from(encoded_val.as_ref()).map_err(S3Error::internal_error)?;
res.headers.insert(name, value);
buf.clear();
}
Expand Down