primitive URLPartUser
primitive URLPartPassword
primitive URLPartHost
primitive URLPartPath
primitive URLPartQuery
primitive URLPartFragment
type URLPart is
( URLPartUser
| URLPartPassword
| URLPartHost
| URLPartPath
| URLPartQuery
| URLPartFragment
)
primitive URLEncode
"""
Functions for checking, encoding, and decoding parts of URLs.
"""
fun encode(from: String, part: URLPart, percent_encoded: Bool = true)
: String ?
=>
"""
URL encode and normilase the given string.
The percent_encoded parameter indicates how '%' characters should be
interpretted.
true => given string is already at least partially encoded, so '%'s
indicate an encoded character.
false => given string is not yet encoded at all, so '%'s are just '%'s.
An error is raised on invalid existing encoding or illegal characters that
cannot be encoded.
"""
if _is_host_ipv6(from, part)? then
return from
end
let out = recover String(from.size()) end
var i = USize(0)
while i < from.size() do
var c = from(i)?
var should_encode = false
if (c == '%') and percent_encoded then
// Treat % as an encoded character.
// _unhex() will throw on bad / missing hex digit.
c = (_unhex(from(i + 1)?)? << 4) or _unhex(from(i + 2)?)?
should_encode = not _normal_decode(c, part)
i = i + 3
else
// Not an encoded character.
should_encode = not _is_char_legal(c, part)
i = i + 1
end
if should_encode then
out.push('%')
out.push(_hex(c >> 4)?)
out.push(_hex(c and 0xF)?)
else
out.push(c)
end
end
out
fun decode(from: String): String ? =>
"""
URL decode a string. Raise an error on invalid URL encoded.
"""
let out = recover String(from.size()) end
var i = USize(0)
while i < from.size() do
let c = from(i)?
if c == '%' then
// _unhex() will throw on bad / missing hex digit.
let value = (_unhex(from(i + 1)?)? << 4) or _unhex(from(i + 2)?)?
out.push(value)
i = i + 3
elseif c == '+' then
out.push(' ')
i = i + 1
else
out.push(c)
i = i + 1
end
end
out
fun check_scheme(scheme: String): Bool =>
"""
Check that the given string is a valid scheme.
"""
try
var i = USize(0)
while i < scheme.size() do
let c = scheme(i)?
if
((c < 'a') or (c > 'z'))
and ((c < 'A') or (c > 'Z'))
and ((c < '0') or (c > '9'))
and (c != '-')
and (c != '+')
and (c != '.')
then
return false
end
i = i + 1
end
end
true
fun check(from: String, part: URLPart): Bool =>
"""
Check that the given string is valid to be the given URL part without
further encoding. Canonical form is not checked for, merely validity.
"""
try
if _is_host_ipv6(from, part)? then
return true
end
else
return false
end
try
var i = USize(0)
while i < from.size() do
let c = from(i)?
if c == '%' then
// Character is encoded.
// _unhex() will throw on bad / missing hex digit.
_unhex(from(i + 1)?)?
_unhex(from(i + 2)?)?
i = i + 3
elseif _is_char_legal(c, part) then
i = i + 1
else
return false
end
end
true
else
false
end
fun _is_char_legal(value: U8, part: URLPart): Bool =>
"""
Determine whether the given character is legal to appear in the specified
URL part.
"""
// The unreserved and sub-delim characters are always allowed.
if ((value >= 'a') and (value <= 'z')) or
((value >= 'A') and (value <= 'Z')) or
((value >= '0') and (value <= '9')) or
(value == '-') or (value == '.') or (value == '_') or (value == '~') or
(value == '!') or (value == '$') or (value == '&') or (value == '\'') or
(value == '(') or (value == ')') or (value == '*') or (value == '+') or
(value == ',') or (value == ';') or (value == '=') then
return true
end
// Which general delims are allowed depends on the part.
match part
| URLPartPassword => (value == ':')
| URLPartPath => (value == ':') or (value == '@') or (value == '/')
| URLPartQuery =>
(value == ':') or (value == '@') or (value == '/') or (value == '?')
| URLPartFragment =>
(value == ':') or (value == '@') or (value == '/') or (value == '?')
else
false
end
fun _normal_decode(value: U8, part: URLPart): Bool =>
"""
Determine whether the given character should be decoded to give normal
form. Some characters, such as sub-delims, are valid to have either in
encoded or unencoded form. These should be left as they are when
normalising. This will return false for such characters.
"""
// The unreserved characters should always be decoded.
if
((value >= 'a') and (value <= 'z'))
or ((value >= 'A') and (value <= 'Z'))
or ((value >= '0') and (value <= '9'))
or (value == '-')
or (value == '_')
or (value == '.')
or (value == '~')
then
return true
end
// Which general delims to decode depends on the part.
match part
| URLPartPassword => (value == ':')
| URLPartPath => (value == ':') or (value == '@') or (value == '/')
| URLPartQuery =>
(value == ':') or (value == '@') or (value == '/') or (value == '?')
| URLPartFragment =>
(value == ':') or (value == '@') or (value == '/') or (value == '?')
else
false
end
fun _is_host_ipv6(host: String, part: URLPart): Bool ? =>
"""
Check whether the given string is a valid IPv6 format host.
Returns:
true if string is a valid IPv6 format host.
false if string is not an IPv6 foramt host at all.
Raises an error if string is an invalid IPv6 format host.
"""
try
if (part isnt URLPartHost) or (host.size() == 0) or (host(0)? != '[') then
return false
end
end
// We are an IPv6 format host, ie a host starting with a '['.
var i = USize(1)
while i < (host.size() - 1) do
let c = host(i)?
// Only hex digits, ':' and '.' and allowed.
if
((c < 'a') or (c > 'f'))
and ((c < 'A') or (c > 'F'))
and ((c < '0') or (c > '9'))
and (c != ':')
and (c != '.')
then
error
end
i = i + 1
end
// Must end with a ']'.
if host(host.size() - 1)? != ']' then error end
true
fun _hex(value: U8): U8 ? =>
"""
Turn 4 bits into a hex value.
"""
if value < 10 then
value + '0'
elseif value < 16 then
(value + 'A') - 10
else
error
end
fun _unhex(value: U8): U8 ? =>
"""
Turn a hex value into 4 bits.
"""
if (value >= '0') and (value <= '9') then
value - '0'
elseif (value >= 'A') and (value <= 'F') then
(value - 'A') + 10
elseif (value >= 'a') and (value <= 'f') then
(value - 'a') + 10
else
error
end