Skip to content

Commit 88b72cf

Browse files
committed
update validation
1 parent 7c00fc1 commit 88b72cf

File tree

2 files changed

+50
-12
lines changed

2 files changed

+50
-12
lines changed

src/validation.rs

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1-
const FORBIDDEN_SPECIFIC_CHARS: &[u32] = &[
2-
0x0020, 0x0022, 0x0023, 0x0025, 0x002F, 0x003A, 0x003C, 0x003E, 0x003F, 0x0040, 0x005B, 0x005C,
3-
0x005D, 0x005E, 0x007C,
1+
/// 256-byte lookup table for forbidden ASCII code points, matching C++ implementation.
2+
/// 1 = forbidden, 0 = allowed.
3+
static IS_FORBIDDEN_DOMAIN_CODE_POINT_TABLE: [u8; 256] = [
4+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5+
1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
6+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
7+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
8+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
412
];
513

614
#[inline]
715
fn is_forbidden_domain_char(cp: u32) -> bool {
8-
// Control characters (fast range checks)
9-
if cp <= 0x001F || (0x007F..=0x009F).contains(&cp) {
10-
return true;
11-
}
12-
13-
// Specific forbidden characters (small array linear search is fast)
14-
FORBIDDEN_SPECIFIC_CHARS.contains(&cp)
16+
IS_FORBIDDEN_DOMAIN_CODE_POINT_TABLE[cp as usize] != 0
1517
}
1618

1719
pub fn valid_name_code_point(cp: u32) -> bool {
@@ -129,7 +131,42 @@ pub fn is_label_valid(label: &str) -> bool {
129131
}
130132

131133
if let Some(stripped) = label.strip_prefix("xn--") {
132-
return crate::punycode::verify_punycode(stripped);
134+
// 1. Verify punycode is valid and decode it
135+
let decoded = crate::punycode::punycode_to_utf32(stripped);
136+
if decoded.is_none() {
137+
return false;
138+
}
139+
let decoded = decoded.unwrap();
140+
141+
// 2. If the decoded is all-ASCII, it should not have been encoded as punycode
142+
if decoded.iter().all(|&cp| cp < 0x80) {
143+
return false;
144+
}
145+
146+
// 3. Convert decoded UTF-32 to UTF-8 string for mapping and normalization
147+
let decoded_utf8_bytes = crate::unicode::utf32_to_utf8(&decoded);
148+
let decoded_utf8 = String::from_utf8_lossy(&decoded_utf8_bytes);
149+
let mapped = crate::mapping::map(&decoded_utf8);
150+
let normalized = crate::normalization::normalize(&mapped);
151+
152+
// 4. Convert normalized string back to UTF-32 for comparison
153+
let normalized_utf32 = crate::unicode::utf8_to_utf32(normalized.as_bytes());
154+
155+
// 5. Ensure the decoded label is unchanged after mapping and normalization
156+
if normalized_utf32 != decoded {
157+
return false;
158+
}
159+
160+
// 6. The label must not be empty and must pass valid_name_code_point for all code points
161+
if normalized_utf32.is_empty() {
162+
return false;
163+
}
164+
for &cp in &normalized_utf32 {
165+
if !valid_name_code_point(cp) {
166+
return false;
167+
}
168+
}
169+
return true;
133170
}
134171

135172
for c in label.chars() {

tests/punycode_tests.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ fn check_punycode_roundtrip(utf8_string: &str, puny_string: &str) {
4141
#[test]
4242
fn test_punycode_fixture_alternating() {
4343
let fixture_path = "tests/fixtures/utf8_punycode_alternating.txt";
44-
let data = fs::read_to_string(fixture_path).expect("Failed to read utf8_punycode_alternating.txt fixture");
44+
let data = fs::read_to_string(fixture_path)
45+
.expect("Failed to read utf8_punycode_alternating.txt fixture");
4546

4647
for (i, line) in data.lines().enumerate() {
4748
// Skip comments and empty lines

0 commit comments

Comments
 (0)