-
Notifications
You must be signed in to change notification settings - Fork 346
Pass "new" tests #537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Pass "new" tests #537
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
1655a76
Update tests from wpt
nox fa9f044
Fix percent encoding of fragments (closes #491)
nox 412266a
Refactor parse_file to look more like the spec
nox e93f999
Fix a Windows quirk
nox efe9ab9
Properly copy hosts of base file:// URLs when needed
nox 54a158b
Path and file parsing.
o0Ignition0o 0586854
Host parsing rules.
o0Ignition0o 26ccc0d
Hash getter and setter.
o0Ignition0o 7efdc53
Fix scheme setter
o0Ignition0o 736d7bc
removing unused imports.
o0Ignition0o a9ca033
Pleasing the 1.33.0 borrow checker.
o0Ignition0o 8ef4847
Make sure a windows drive letter segment always ends with a slash.
o0Ignition0o aeef54f
trim file paths if needed.
o0Ignition0o 925ec94
Avoid allocation when checking for windows drive letters.
o0Ignition0o 4464840
Comments and nits fixups.
o0Ignition0o File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -456,13 +456,15 @@ impl Url { | |
|
||
if self.slice(self.scheme_end + 1..).starts_with("//") { | ||
// URL with authority | ||
match self.byte_at(self.username_end) { | ||
b':' => { | ||
assert!(self.host_start >= self.username_end + 2); | ||
assert_eq!(self.byte_at(self.host_start - 1), b'@'); | ||
if self.username_end != self.serialization.len() as u32 { | ||
match self.byte_at(self.username_end) { | ||
b':' => { | ||
assert!(self.host_start >= self.username_end + 2); | ||
assert_eq!(self.byte_at(self.host_start - 1), b'@'); | ||
} | ||
b'@' => assert!(self.host_start == self.username_end + 1), | ||
_ => assert_eq!(self.username_end, self.scheme_end + 3), | ||
} | ||
b'@' => assert!(self.host_start == self.username_end + 1), | ||
_ => assert_eq!(self.username_end, self.scheme_end + 3), | ||
} | ||
assert!(self.host_start >= self.username_end); | ||
assert!(self.host_end >= self.host_start); | ||
|
@@ -490,7 +492,10 @@ impl Url { | |
Some(port_str.parse::<u16>().expect("Couldn't parse port?")) | ||
); | ||
} | ||
assert_eq!(self.byte_at(self.path_start), b'/'); | ||
assert!( | ||
self.path_start as usize == self.serialization.len() | ||
|| matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?') | ||
); | ||
} else { | ||
// Anarchist URL (no authority) | ||
assert_eq!(self.username_end, self.scheme_end + 1); | ||
|
@@ -501,11 +506,11 @@ impl Url { | |
assert_eq!(self.path_start, self.scheme_end + 1); | ||
} | ||
if let Some(start) = self.query_start { | ||
assert!(start > self.path_start); | ||
assert!(start >= self.path_start); | ||
assert_eq!(self.byte_at(start), b'?'); | ||
} | ||
if let Some(start) = self.fragment_start { | ||
assert!(start > self.path_start); | ||
assert!(start >= self.path_start); | ||
assert_eq!(self.byte_at(start), b'#'); | ||
} | ||
if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { | ||
|
@@ -685,7 +690,7 @@ impl Url { | |
/// ``` | ||
#[inline] | ||
pub fn cannot_be_a_base(&self) -> bool { | ||
!self.slice(self.path_start..).starts_with('/') | ||
!self.slice(self.scheme_end + 1..).starts_with('/') | ||
} | ||
|
||
/// Return the username for this URL (typically the empty string) | ||
|
@@ -745,7 +750,10 @@ impl Url { | |
pub fn password(&self) -> Option<&str> { | ||
// This ':' is not the one marking a port number since a host can not be empty. | ||
// (Except for file: URLs, which do not have port numbers.) | ||
if self.has_authority() && self.byte_at(self.username_end) == b':' { | ||
if self.has_authority() | ||
&& self.username_end != self.serialization.len() as u32 | ||
&& self.byte_at(self.username_end) == b':' | ||
{ | ||
debug_assert!(self.byte_at(self.host_start - 1) == b'@'); | ||
Some(self.slice(self.username_end + 1..self.host_start - 1)) | ||
} else { | ||
|
@@ -1226,7 +1234,7 @@ impl Url { | |
if let Some(input) = fragment { | ||
self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); | ||
self.serialization.push('#'); | ||
self.mutate(|parser| parser.parse_fragment(parser::Input::new(input))) | ||
self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) | ||
} else { | ||
self.fragment_start = None | ||
} | ||
|
@@ -1284,7 +1292,12 @@ impl Url { | |
let scheme_type = SchemeType::from(self.scheme()); | ||
let scheme_end = self.scheme_end; | ||
self.mutate(|parser| { | ||
parser.parse_query(scheme_type, scheme_end, parser::Input::new(input)) | ||
let vfn = parser.violation_fn; | ||
parser.parse_query( | ||
scheme_type, | ||
scheme_end, | ||
parser::Input::trim_tab_and_newlines(input, vfn), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the spec for the trimming when setting a query? |
||
) | ||
}); | ||
} | ||
|
||
|
@@ -1625,14 +1638,34 @@ impl Url { | |
if host == "" && SchemeType::from(self.scheme()).is_special() { | ||
return Err(ParseError::EmptyHost); | ||
} | ||
let mut host_substr = host; | ||
// Otherwise, if c is U+003A (:) and the [] flag is unset, then | ||
if !host.starts_with('[') || !host.ends_with(']') { | ||
match host.find(':') { | ||
Some(0) => { | ||
// If buffer is the empty string, validation error, return failure. | ||
return Err(ParseError::InvalidDomainCharacter); | ||
} | ||
// Let host be the result of host parsing buffer | ||
Some(colon_index) => { | ||
host_substr = &host[..colon_index]; | ||
} | ||
None => {} | ||
} | ||
} | ||
if SchemeType::from(self.scheme()).is_special() { | ||
self.set_host_internal(Host::parse(host)?, None) | ||
self.set_host_internal(Host::parse(host_substr)?, None); | ||
} else { | ||
self.set_host_internal(Host::parse_opaque(host)?, None) | ||
self.set_host_internal(Host::parse_opaque(host_substr)?, None); | ||
} | ||
} else if self.has_host() { | ||
if SchemeType::from(self.scheme()).is_special() { | ||
let scheme_type = SchemeType::from(self.scheme()); | ||
if scheme_type.is_special() { | ||
return Err(ParseError::EmptyHost); | ||
} else { | ||
if self.serialization.len() == self.path_start as usize { | ||
self.serialization.push('/'); | ||
} | ||
} | ||
debug_assert!(self.byte_at(self.scheme_end) == b':'); | ||
debug_assert!(self.byte_at(self.path_start) == b'/'); | ||
|
@@ -1935,14 +1968,28 @@ impl Url { | |
/// | ||
/// # fn run() -> Result<(), ParseError> { | ||
/// let mut url = Url::parse("https://example.net")?; | ||
/// let result = url.set_scheme("foo"); | ||
/// assert_eq!(url.as_str(), "foo://example.net/"); | ||
/// let result = url.set_scheme("http"); | ||
/// assert_eq!(url.as_str(), "http://example.net/"); | ||
/// assert!(result.is_ok()); | ||
/// # Ok(()) | ||
/// # } | ||
/// # run().unwrap(); | ||
/// ``` | ||
/// Change the URL’s scheme from `foo` to `bar`: | ||
/// | ||
/// ``` | ||
/// use url::Url; | ||
/// # use url::ParseError; | ||
/// | ||
/// # fn run() -> Result<(), ParseError> { | ||
/// let mut url = Url::parse("foo://example.net")?; | ||
/// let result = url.set_scheme("bar"); | ||
/// assert_eq!(url.as_str(), "bar://example.net"); | ||
/// assert!(result.is_ok()); | ||
/// # Ok(()) | ||
/// # } | ||
/// # run().unwrap(); | ||
/// ``` | ||
/// | ||
/// Cannot change URL’s scheme from `https` to `foõ`: | ||
/// | ||
|
@@ -1975,14 +2022,55 @@ impl Url { | |
/// # } | ||
/// # run().unwrap(); | ||
/// ``` | ||
/// Cannot change the URL’s scheme from `foo` to `https`: | ||
/// | ||
/// ``` | ||
/// use url::Url; | ||
/// # use url::ParseError; | ||
/// | ||
/// # fn run() -> Result<(), ParseError> { | ||
/// let mut url = Url::parse("foo://example.net")?; | ||
/// let result = url.set_scheme("https"); | ||
/// assert_eq!(url.as_str(), "foo://example.net"); | ||
/// assert!(result.is_err()); | ||
/// # Ok(()) | ||
/// # } | ||
/// # run().unwrap(); | ||
/// ``` | ||
/// Cannot change the URL’s scheme from `http` to `foo`: | ||
/// | ||
/// ``` | ||
/// use url::Url; | ||
/// # use url::ParseError; | ||
/// | ||
/// # fn run() -> Result<(), ParseError> { | ||
/// let mut url = Url::parse("http://example.net")?; | ||
/// let result = url.set_scheme("foo"); | ||
/// assert_eq!(url.as_str(), "http://example.net/"); | ||
/// assert!(result.is_err()); | ||
/// # Ok(()) | ||
/// # } | ||
/// # run().unwrap(); | ||
/// ``` | ||
pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { | ||
let mut parser = Parser::for_setter(String::new()); | ||
let remaining = parser.parse_scheme(parser::Input::new(scheme))?; | ||
if !remaining.is_empty() | ||
|| (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) | ||
let new_scheme_type = SchemeType::from(&parser.serialization); | ||
let old_scheme_type = SchemeType::from(self.scheme()); | ||
// If url’s scheme is a special scheme and buffer is not a special scheme, then return. | ||
if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || | ||
// If url’s scheme is not a special scheme and buffer is a special scheme, then return. | ||
(!new_scheme_type.is_special() && old_scheme_type.is_special()) || | ||
// If url includes credentials or has a non-null port, and buffer is "file", then return. | ||
// If url’s scheme is "file" and its host is an empty host or null, then return. | ||
(new_scheme_type.is_file() && self.has_authority()) | ||
{ | ||
return Err(()); | ||
} | ||
|
||
if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { | ||
return Err(()); | ||
} | ||
let old_scheme_end = self.scheme_end; | ||
let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); | ||
let adjust = |index: &mut u32| { | ||
|
@@ -2004,6 +2092,14 @@ impl Url { | |
|
||
parser.serialization.push_str(self.slice(old_scheme_end..)); | ||
self.serialization = parser.serialization; | ||
|
||
// Update the port so it can be removed | ||
// If it is the scheme's default | ||
// we don't mind it silently failing | ||
// if there was no port in the first place | ||
let previous_port = self.port(); | ||
let _ = self.set_port(previous_port); | ||
|
||
Ok(()) | ||
} | ||
|
||
|
@@ -2408,6 +2504,7 @@ fn path_to_file_url_segments_windows( | |
} | ||
let mut components = path.components(); | ||
|
||
let host_start = serialization.len() + 1; | ||
let host_end; | ||
let host_internal; | ||
match components.next() { | ||
|
@@ -2434,15 +2531,24 @@ fn path_to_file_url_segments_windows( | |
_ => return Err(()), | ||
} | ||
|
||
let mut path_only_has_prefix = true; | ||
for component in components { | ||
if component == Component::RootDir { | ||
continue; | ||
} | ||
path_only_has_prefix = false; | ||
// FIXME: somehow work with non-unicode? | ||
let component = component.as_os_str().to_str().ok_or(())?; | ||
serialization.push('/'); | ||
serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); | ||
} | ||
// A windows drive letter must end with a slash. | ||
if serialization.len() > host_start | ||
&& parser::is_windows_drive_letter(&serialization[host_start..]) | ||
&& path_only_has_prefix | ||
{ | ||
serialization.push('/'); | ||
} | ||
Ok((host_end, host_internal)) | ||
} | ||
|
||
|
@@ -2467,6 +2573,14 @@ fn file_url_segments_to_pathbuf( | |
bytes.push(b'/'); | ||
bytes.extend(percent_decode(segment.as_bytes())); | ||
} | ||
// A windows drive letter must end with a slash. | ||
if bytes.len() > 2 { | ||
if matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') | ||
&& matches!(bytes[bytes.len() - 1], b':' | b'|') | ||
{ | ||
bytes.push(b'/'); | ||
} | ||
} | ||
let os_str = OsStr::from_bytes(&bytes); | ||
let path = PathBuf::from(os_str); | ||
debug_assert!( | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.