class val URL
"""
Holds the components of a URL. These are always stored as valid, URL-encoded
values.
"""
var scheme: String = ""
"""
URL scheme.
If the given URL does not provide a scheme, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.1).
"""
var user: String = ""
"""
URL user as part of the URLs authority component:
```
authority = [ user [ ":" password ] "@" ] host [ ":" port ]
```
If the URL does not provide user information, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.2.1).
"""
var password: String = ""
"""
URL password as part of the URLs authority component:
```
authority = [ user [ ":" password ] "@" ] host [ ":" port ]
```
If the URL does not provide a password, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.2.1).
"""
var host: String = ""
"""
URL host as part of the URLs authority component:
```
authority = [ user [ ":" password ] "@" ] host [ ":" port ]
```
If the URL does not provide a host, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.2.2).
"""
var port: U16 = 0
"""
URL port as part of the URLs authority component:
```
authority = [ user [ ":" password ] "@" ] host [ ":" port ]
```
If the URL does not provide a port, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.2.3).
"""
var path: String = ""
"""
URL path component.
If the URL does not provide a path component, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.3).
"""
var query: String = ""
"""
URL query component.
If the URL does not provide a query component, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.4).
"""
var fragment: String = ""
"""
Url fragment identifier component.
If the URL does not provide a fragment identifier component, this will be the empty string.
See also [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3.5).
"""
new val create() =>
"""
Create an empty URL.
"""
None
new val build(from: String, percent_encoded: Bool = true) ? =>
"""
Parse the URL string into its components. If it isn't URL encoded, encode
it. If existing URL encoding is invalid, raise an error.
"""
_parse(from)?
if not URLEncode.check_scheme(scheme) then error end
user = URLEncode.encode(user, URLPartUser, percent_encoded)?
password = URLEncode.encode(password, URLPartPassword, percent_encoded)?
host = URLEncode.encode(host, URLPartHost, percent_encoded)?
path = URLEncode.encode(path, URLPartPath, percent_encoded)?
query = URLEncode.encode(query, URLPartQuery, percent_encoded)?
fragment = URLEncode.encode(fragment, URLPartFragment, percent_encoded)?
new val valid(from: String) ? =>
"""
Parse the URL string into its components. If it isn't URL encoded, raise an
error.
"""
_parse(from)?
if not is_valid() then
error
end
fun is_valid(): Bool =>
"""
Return true if all elements are correctly URL encoded.
"""
URLEncode.check_scheme(scheme) and
URLEncode.check(user, URLPartUser) and
URLEncode.check(password, URLPartPassword) and
URLEncode.check(host, URLPartHost) and
URLEncode.check(path, URLPartPath) and
URLEncode.check(query, URLPartQuery) and
URLEncode.check(fragment, URLPartFragment)
fun string(): String iso^ =>
"""
Combine the components into a string.
"""
let len =
scheme.size() + 3 + user.size() + 1 + password.size() + 1 + host.size()
+ 6 + path.size() + 1 + query.size() + 1 + fragment.size()
let s = recover String(len) end
if scheme.size() > 0 then
s.append(scheme)
s.append(":")
end
if (user.size() > 0) or (host.size() > 0) then
s.append("//")
end
if user.size() > 0 then
s.append(user)
if password.size() > 0 then
s.append(":")
s.append(password)
end
s.append("@")
end
if host.size() > 0 then
s.append(host)
// Do not output port if it's the scheme default.
if port != default_port() then
s.append(":")
s.append(port.string())
end
end
s.append(path)
if query.size() > 0 then
s.append("?")
s.append(query)
end
if fragment.size() > 0 then
s.append("#")
s.append(fragment)
end
consume s
fun val join(that: URL): URL =>
"""
Using this as a base URL, concatenate with another, possibly relative, URL
in the same way a browser does for a link.
"""
// TODO:
this
fun default_port(): U16 =>
"""
Report the default port for our scheme.
Returns 0 for unrecognised schemes.
"""
match scheme
| "http" => 80
| "https" => 443
else 0
end
fun ref _parse(from: String) ? =>
"""
Parse the given string as a URL.
Raises an error on invalid port number.
"""
(var offset, scheme) = _parse_scheme(from)
(offset, let authority) = _parse_part(from, "//", "/?#", offset)
(offset, path) = _parse_part(from, "", "?#", offset)
(offset, query) = _parse_part(from, "?", "#", offset)
(offset, fragment) = _parse_part(from, "#", "", offset)
if path.size() == 0 then
// An empty path is a root path.
path = "/"
end
(var userinfo, var hostport) = _split(authority, '@')
if hostport.size() == 0 then
// No '@' found, hostport is whole of authority.
hostport = userinfo = ""
end
(user, password) = _split(userinfo, ':')
(host, var port_str) = _parse_hostport(hostport)
port =
if port_str.size() > 0 then
port_str.u16()?
else
default_port()
end
fun _parse_scheme(from: String): (/*offset*/ISize, /*scheme*/String) =>
"""
Find the scheme, if any, at the start of the given string.
The offset of the part following the scheme is returned.
"""
// We have a scheme only if we have a ':' before any of "/?#".
try
var i = USize(0)
while i < from.size() do
let c = from(i)?
if c == ':' then
// Scheme found.
return ((i + 1).isize(), from.substring(0, i.isize()))
end
if (c == '/') or (c == '?') or (c == '#') then
// No scheme.
return (0, "")
end
i = i + 1
end
end
// End of string reached without finding any relevant terminators.
(0, "")
fun _parse_part(
from: String,
prefix: String,
terminators: String,
offset: ISize)
: (/*offset*/ISize, /*part*/String)
=>
"""
Attempt to parse the specified part out of the given string. Only attempt
the parse if the given prefix is found first. Pass "" if no prefix is
needed. The part ends when any one of the given terminator characters is
found, or the end of the input is reached. The offset of the terminator is
returned, if one is found.
"""
if (prefix.size() > 0) and (not from.at(prefix, offset)) then
// Prefix not found.
return (offset, "")
end
let start = offset + prefix.size().isize()
try
var i = start.usize()
while i < from.size() do
let c = from(i)?
var j = USize(0)
while j < terminators.size() do
if terminators(j)? == c then
// Terminator found.
return (i.isize(), from.substring(start, i.isize()))
end
j = j + 1
end
i = i + 1
end
end
// No terminator found, take whole string.
(from.size().isize(), from.substring(start))
fun _split(src: String, separator: U8): (String, String) =>
"""
Split the given string in 2 around the first instance of the specified
separator. If the string does not contain the separator then the first
resulting string is the whole src and the second is empty.
"""
try
var i = USize(0)
while i < src.size() do
if src(i)? == separator then
// Separator found.
return (src.substring(0, i.isize()), src.substring((i + 1).isize()))
end
i = i + 1
end
end
// Separator not found.
(src, "")
fun _parse_hostport(hostport: String): (/*host*/String, /*port*/String) =>
"""
Split the given "host and port" string into the host and port parts.
"""
try
if (hostport.size() == 0) or (hostport(0)? != '[') then
// This is not an IPv6 format host, just split at the first ':'.
return _split(hostport, ':')
end
// This is an IPv6 format host, need to find the ']'
var i = USize(0)
var terminator = U8(']')
while i < hostport.size() do
if hostport(i)? == terminator then
if terminator == ':' then
// ':' found, now we can separate the host and port
return (hostport.substring(0, i.isize()),
hostport.substring((i + 1).isize()))
end
// ']' found, now find ':'
terminator = ':'
end
i = i + 1
end
end
// ':' not found, we have no port.
(hostport, "")