Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 13 additions & 41 deletions src/gleam/uri.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ fn parse_host(uri_string: String, pieces: Uri) -> Result(Uri, Nil) {
// - [^:]
case uri_string {
// If we find an opening bracket we know it's the first format.
"[" <> _ -> parse_host_within_brackets(uri_string, pieces)
"[" <> rest -> parse_host_within_brackets(rest, pieces)

// A `:` marks the beginning of the port part of the authority string.
":" <> _ -> {
Expand Down Expand Up @@ -233,66 +233,38 @@ fn parse_host_within_brackets_loop(
size: Int,
) -> Result(Uri, Nil) {
case uri_string {
// If the string is over the entire string we were iterating through is the
// host part.
"" -> Ok(Uri(..pieces, host: Some(uri_string)))
// We reached the end without finding a closing `]`.
"" -> Error(Nil)

// A `]` marks the end of the host and the start of the port part.
"]" <> rest if size == 0 -> parse_port(rest, pieces)
"]" <> _ if size == 0 -> Error(Nil)
"]" <> rest -> {
let host = codeunit_slice(original, at_index: 0, length: size + 1)
let host = "[" <> codeunit_slice(original, at_index: 0, length: size + 1)
let pieces = Uri(..pieces, host: Some(host))
parse_port(rest, pieces)
}

// `/` marks the beginning of a path.
"/" <> _ if size == 0 -> parse_path(uri_string, pieces)
"/" <> _ -> {
let host = codeunit_slice(original, at_index: 0, length: size)
let pieces = Uri(..pieces, host: Some(host))
parse_path(uri_string, pieces)
}

// `?` marks the beginning of the query with question mark.
"?" <> rest if size == 0 -> parse_query_with_question_mark(rest, pieces)
"?" <> rest -> {
let host = codeunit_slice(original, at_index: 0, length: size)
let pieces = Uri(..pieces, host: Some(host))
parse_query_with_question_mark(rest, pieces)
}

// `#` marks the beginning of the fragment part.
"#" <> rest if size == 0 -> parse_fragment(rest, pieces)
"#" <> rest -> {
let host = codeunit_slice(original, at_index: 0, length: size)
let pieces = Uri(..pieces, host: Some(host))
parse_fragment(rest, pieces)
}

// In all other cases we just keep iterating.
_ -> {
let #(char, rest) = pop_codeunit(uri_string)
// Inside `[...]` there can only be some characters, if we find a special
// one then we know that we're actually parsing the other format for the
// host and we switch to that!
// Inside `[...]` there can only be some characters.
case is_valid_host_within_brackets_char(char) {
True ->
parse_host_within_brackets_loop(original, rest, pieces, size + 1)

False ->
parse_host_outside_of_brackets_loop(original, original, pieces, 0)
False -> Error(Nil)
}
}
}
}

fn is_valid_host_within_brackets_char(char: Int) -> Bool {
// Valid IPv6 hosts are only [0-9A-Fa-f:.].
// [0-9]
{ 48 >= char && char <= 57 }
// [A-Z]
|| { 65 >= char && char <= 90 }
// [a-z]
|| { 97 >= char && char <= 122 }
{ 48 <= char && char <= 57 }
// [A-F]
|| { 65 <= char && char <= 70 }
// [a-f]
|| { 97 <= char && char <= 102 }
// :
|| char == 58
// .
Expand Down
19 changes: 18 additions & 1 deletion src/gleam_stdlib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ uri_parse(String) ->
{ok, {uri,
maps_get_optional_lowercase(Uri, scheme),
maps_get_optional(Uri, userinfo),
maps_get_optional(Uri, host),
maps_get_optional_host(Uri),
Port,
maps_get_or(Uri, path, <<>>),
maps_get_optional(Uri, query),
Expand All @@ -251,6 +251,23 @@ maps_get_optional(Map, Key) ->
catch _:_ -> none
end.

maps_get_optional_host(Map) ->
try {some, bracket_ipv6_host(maps:get(host, Map))}
catch _:_ -> none
end.

bracket_ipv6_host(Host) when is_binary(Host) ->
case binary:match(Host, <<":">>) of
nomatch -> Host;
_ ->
case Host of
<<"[", _/binary>> -> Host;
_ -> <<"[", Host/binary, "]">>
end
end;
bracket_ipv6_host(Host) ->
Host.

maps_get_or(Map, Key, Default) ->
try maps:get(Key, Map)
catch _:_ -> Default
Expand Down
98 changes: 98 additions & 0 deletions test/gleam/uri_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,104 @@ pub fn parse_empty_query_3_test() {
let assert Some("") = assert_parse("http://example.com/test?").query
}

pub fn parse_ipv6_host_preserves_brackets_test() {
let assert Ok(parsed) =
uri.parse("http://[2600:1406:bc00:53::b81e:94c8]/wobble")
assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]")
}

pub fn parse_ipv6_host_with_port_preserves_brackets_test() {
let assert Ok(parsed) =
uri.parse("http://[2600:1406:bc00:53::b81e:94c8]:8080/wobble")
assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]")
assert parsed.port == Some(8080)
}

pub fn parse_ipv6_host_roundtrip_to_string_test() {
let assert Ok(parsed) = uri.parse("http://[2600:1406:bc00:53::b81e:94c8]")
assert uri.to_string(parsed) == "http://[2600:1406:bc00:53::b81e:94c8]/"
}

pub fn parse_ipv6_compact_loopback_preserves_brackets_test() {
let assert Ok(parsed) = uri.parse("http://[::1]/wobble")
assert parsed.host == Some("[::1]")
assert parsed.path == "/wobble"
}

pub fn parse_ipv6_compact_loopback_with_port_test() {
let assert Ok(parsed) = uri.parse("http://[::1]:443/wobble")
assert parsed.host == Some("[::1]")
assert parsed.port == Some(443)
}

pub fn parse_ipv6_collapsed_middle_preserves_brackets_test() {
let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/wobble")
assert parsed.host == Some("[2001:db8::2:1]")
}

pub fn parse_ipv6_collapsed_roundtrip_to_string_test() {
let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]")
assert uri.to_string(parsed) == "http://[2001:db8::2:1]/"
}

pub fn parse_ipv6_host_with_path_query_fragment_test() {
let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/foo/bar?baz=bif#blah")
assert parsed.scheme == Some("http")
assert parsed.host == Some("[2001:db8::2:1]")
assert parsed.path == "/foo/bar"
assert parsed.query == Some("baz=bif")
assert parsed.fragment == Some("blah")
assert parsed.port == None
assert parsed.userinfo == None
}

pub fn parse_malformed_many_opening_brackets_in_host_test() {
assert uri.parse("http://[[[[[[[[[[]/") == Error(Nil)
}

pub fn parse_malformed_nested_opening_bracket_in_host_test() {
assert uri.parse("http://[[::1]/") == Error(Nil)
}

pub fn parse_malformed_unclosed_bracket_host_test() {
assert uri.parse("http://[::1[/") == Error(Nil)
}

pub fn parse_malformed_question_mark_within_bracket_host_test() {
assert uri.parse("http://[::1?bad]/") == Error(Nil)
}

pub fn parse_malformed_slash_within_bracket_host_test() {
assert uri.parse("http://[::1/bad]/") == Error(Nil)
}

pub fn ipv6_uppercase_test() {
// ensure A–F upper case are accepted
let assert Ok(parsed) = uri.parse("http://[2001:DB8::1]")
assert parsed.host == Some("[2001:DB8::1]")
assert uri.to_string(parsed) == "http://[2001:DB8::1]/"
}

pub fn ipv6_mixedcase_test() {
let assert Ok(parsed) = uri.parse("http://[2001:dB8:ABcd::]")
assert parsed.host == Some("[2001:dB8:ABcd::]")
}

pub fn parse_ipv6_with_invalid_char_test() {
// 'g' is not a hex digit
assert uri.parse("http://[::g]/") == Error(Nil)
assert uri.parse("http://[::G]/") == Error(Nil)
}

pub fn parse_bracket_followed_by_text_error_test() {
// characters immediately after closing bracket and before slash should error
assert uri.parse("http://[::1]foo") == Error(Nil)
}

pub fn parse_double_closing_bracket_test() {
assert uri.parse("http://[::1]]/") == Error(Nil)
}

pub fn full_uri_to_string_test() {
let test_uri =
uri.Uri(
Expand Down