|
1 | | -const FORBIDDEN_SPECIFIC_CHARS: &[u32] = &[ |
2 | | - 0x0020, 0x0022, 0x0023, 0x0025, 0x002F, 0x003A, 0x003C, 0x003E, 0x003F, 0x0040, 0x005B, 0x005C, |
3 | | - 0x005D, 0x005E, 0x007C, |
| 1 | +/// 256-byte lookup table for forbidden ASCII code points, matching C++ implementation. |
| 2 | +/// 1 = forbidden, 0 = allowed. |
| 3 | +static IS_FORBIDDEN_DOMAIN_CODE_POINT_TABLE: [u8; 256] = [ |
| 4 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 5 | + 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, |
| 6 | + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, |
| 7 | + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, |
| 8 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 9 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 10 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 11 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
4 | 12 | ]; |
5 | 13 |
|
6 | 14 | #[inline] |
7 | 15 | fn is_forbidden_domain_char(cp: u32) -> bool { |
8 | | - // Control characters (fast range checks) |
9 | | - if cp <= 0x001F || (0x007F..=0x009F).contains(&cp) { |
10 | | - return true; |
11 | | - } |
12 | | - |
13 | | - // Specific forbidden characters (small array linear search is fast) |
14 | | - FORBIDDEN_SPECIFIC_CHARS.contains(&cp) |
| 16 | + IS_FORBIDDEN_DOMAIN_CODE_POINT_TABLE[cp as usize] != 0 |
15 | 17 | } |
16 | 18 |
|
17 | 19 | pub fn valid_name_code_point(cp: u32) -> bool { |
@@ -129,7 +131,42 @@ pub fn is_label_valid(label: &str) -> bool { |
129 | 131 | } |
130 | 132 |
|
131 | 133 | if let Some(stripped) = label.strip_prefix("xn--") { |
132 | | - return crate::punycode::verify_punycode(stripped); |
| 134 | + // 1. Verify punycode is valid and decode it |
| 135 | + let decoded = crate::punycode::punycode_to_utf32(stripped); |
| 136 | + if decoded.is_none() { |
| 137 | + return false; |
| 138 | + } |
| 139 | + let decoded = decoded.unwrap(); |
| 140 | + |
| 141 | + // 2. If the decoded is all-ASCII, it should not have been encoded as punycode |
| 142 | + if decoded.iter().all(|&cp| cp < 0x80) { |
| 143 | + return false; |
| 144 | + } |
| 145 | + |
| 146 | + // 3. Convert decoded UTF-32 to UTF-8 string for mapping and normalization |
| 147 | + let decoded_utf8_bytes = crate::unicode::utf32_to_utf8(&decoded); |
| 148 | + let decoded_utf8 = String::from_utf8_lossy(&decoded_utf8_bytes); |
| 149 | + let mapped = crate::mapping::map(&decoded_utf8); |
| 150 | + let normalized = crate::normalization::normalize(&mapped); |
| 151 | + |
| 152 | + // 4. Convert normalized string back to UTF-32 for comparison |
| 153 | + let normalized_utf32 = crate::unicode::utf8_to_utf32(normalized.as_bytes()); |
| 154 | + |
| 155 | + // 5. Ensure the decoded label is unchanged after mapping and normalization |
| 156 | + if normalized_utf32 != decoded { |
| 157 | + return false; |
| 158 | + } |
| 159 | + |
| 160 | + // 6. The label must not be empty and must pass valid_name_code_point for all code points |
| 161 | + if normalized_utf32.is_empty() { |
| 162 | + return false; |
| 163 | + } |
| 164 | + for &cp in &normalized_utf32 { |
| 165 | + if !valid_name_code_point(cp) { |
| 166 | + return false; |
| 167 | + } |
| 168 | + } |
| 169 | + return true; |
133 | 170 | } |
134 | 171 |
|
135 | 172 | for c in label.chars() { |
|
0 commit comments