ci: fix warnings/errors due to the vfmt change

This commit is contained in:
Delyan Angelov 2020-10-15 16:17:52 +03:00
parent 50a2b033b7
commit 31ef921ef2
33 changed files with 466 additions and 570 deletions

View file

@ -22,10 +22,10 @@ enum EncodingMode {
const (
err_msg_escape = 'unescape: invalid URL escape'
err_msg_parse = 'parse: failed parsing url'
err_msg_parse = 'parse: failed parsing url'
)
fn error_msg(message, val string) string {
fn error_msg(message string, val string) string {
mut msg := 'net.urllib.$message'
if val != '' {
msg = '$msg ($val)'
@ -53,7 +53,8 @@ fn should_escape(c byte, mode EncodingMode) bool {
// we could possibly allow, and parse will reject them if we
// escape them (because hosts can`t use %-encoding for
// ASCII bytes).
if c in [`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `:`, `[`, `]`, `<`, `>`, `"`] {
if c in
[`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `:`, `[`, `]`, `<`, `>`, `"`] {
return false
}
}
@ -100,11 +101,11 @@ fn should_escape(c byte, mode EncodingMode) bool {
// everything, so escape nothing.
return false
}
else {
}}
else {}
}
}
else {
}}
else {}
}
if mode == .encode_fragment {
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
@ -113,11 +114,9 @@ fn should_escape(c byte, mode EncodingMode) bool {
// escape single quote to avoid breaking callers that had previously assumed that
// single quotes would be escaped. See issue #19917.
match c {
`!`, `(`, `)`, `*` {
return false
}
else {
}}
`!`, `(`, `)`, `*` { return false }
else {}
}
}
// Everything else must be escaped.
return true
@ -150,55 +149,58 @@ fn unescape(s_ string, mode EncodingMode) ?string {
// Count %, check that they're well-formed.
mut n := 0
mut has_plus := false
for i := 0; i < s.len; {
for i := 0; i < s.len; {
x := s[i]
match x {
`%` {
if s == '' {
break
}
n++
if i + 2 >= s.len || !ishex(s[i + 1]) || !ishex(s[i + 2]) {
s = s[i..]
if s.len > 3 {
s = s[..3]
`%` {
if s == '' {
break
}
return error(error_msg(err_msg_escape, s))
}
// Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used
// for non-ASCII bytes.
// But https://tools.ietf.org/html/rfc6874#section-2
// introduces %25 being allowed to escape a percent sign
// in IPv6 scoped-address literals. Yay.
if mode == .encode_host && unhex(s[i + 1]) < 8 && s[i..i + 3] != '%25' {
return error(error_msg(err_msg_escape, s[i..i + 3]))
}
if mode == .encode_zone {
// RFC 6874 says basically 'anything goes' for zone identifiers
// and that even non-ASCII can be redundantly escaped,
// but it seems prudent to restrict %-escaped bytes here to those
// that are valid host name bytes in their unescaped form.
// That is, you can use escaping in the zone identifier but not
// to introduce bytes you couldn't just write directly.
// But Windows puts spaces here! Yay.
v := ( (unhex(s[i + 1])<<byte(4)) | unhex(s[i + 2]))
if s[i..i + 3] != '%25' && v != ` ` && should_escape(v, .encode_host) {
error(error_msg(err_msg_escape, s[i..i + 3]))
n++
if i + 2 >= s.len || !ishex(s[i + 1]) || !ishex(s[i + 2]) {
s = s[i..]
if s.len > 3 {
s = s[..3]
}
return error(error_msg(err_msg_escape, s))
}
// Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used
// for non-ASCII bytes.
// But https://tools.ietf.org/html/rfc6874#section-2
// introduces %25 being allowed to escape a percent sign
// in IPv6 scoped-address literals. Yay.
if mode == .encode_host && unhex(s[i + 1]) < 8 && s[i..i + 3] != '%25' {
return error(error_msg(err_msg_escape, s[i..i + 3]))
}
if mode == .encode_zone {
// RFC 6874 says basically 'anything goes' for zone identifiers
// and that even non-ASCII can be redundantly escaped,
// but it seems prudent to restrict %-escaped bytes here to those
// that are valid host name bytes in their unescaped form.
// That is, you can use escaping in the zone identifier but not
// to introduce bytes you couldn't just write directly.
// But Windows puts spaces here! Yay.
v := ((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2]))
if s[i..i + 3] != '%25' && v != ` ` && should_escape(v, .encode_host) {
error(error_msg(err_msg_escape, s[i..i + 3]))
}
}
i += 3
}
i += 3
}
`+` {
has_plus = mode == .encode_query_component
i++
}
else {
if (mode == .encode_host || mode == .encode_zone) && s[i] < 0x80 && should_escape(s[i], mode) {
error(error_msg('unescape: invalid character in host name', s[i..i + 1]))
`+` {
has_plus = mode == .encode_query_component
i++
}
i++
}}
else {
if (mode == .encode_host ||
mode == .encode_zone) &&
s[i] < 0x80 && should_escape(s[i], mode) {
error(error_msg('unescape: invalid character in host name', s[i..i + 1]))
}
i++
}
}
}
if n == 0 && !has_plus {
return s
@ -208,20 +210,20 @@ fn unescape(s_ string, mode EncodingMode) ?string {
x := s[i]
match x {
`%` {
t.write( ((unhex(s[i + 1])<<byte(4)) | unhex(s[i + 2])).str() )
t.write(((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2])).str())
i += 2
}
`+` {
if mode == .encode_query_component {
t.write(' ')
}
else {
} else {
t.write('+')
}
}
else {
t.write(s[i].str())
}}
}
}
}
return t.str()
}
@ -242,13 +244,12 @@ fn escape(s string, mode EncodingMode) string {
mut space_count := 0
mut hex_count := 0
mut c := byte(0)
for i in 0..s.len {
for i in 0 .. s.len {
c = s[i]
if should_escape(c, mode) {
if c == ` ` && mode == .encode_query_component {
space_count++
}
else {
} else {
hex_count++
}
}
@ -256,18 +257,17 @@ fn escape(s string, mode EncodingMode) string {
if space_count == 0 && hex_count == 0 {
return s
}
buf := []byte{len:(64)}
buf := []byte{len: (64)}
mut t := []byte{}
required := s.len + 2 * hex_count
if required <= buf.len {
t = buf[..required]
}
else {
t = []byte{len:(required)}
} else {
t = []byte{len: (required)}
}
if hex_count == 0 {
copy(t, s.bytes())
for i in 0..s.len {
for i in 0 .. s.len {
if s[i] == ` ` {
t[i] = `+`
}
@ -276,19 +276,17 @@ fn escape(s string, mode EncodingMode) string {
}
upperhex := '0123456789ABCDEF'
mut j := 0
for i in 0..s.len {
for i in 0 .. s.len {
c1 := s[i]
if c1 == ` ` && mode == .encode_query_component {
t[j] = `+`
j++
}
else if should_escape(c1, mode) {
} else if should_escape(c1, mode) {
t[j] = `%`
t[j + 1] = upperhex[c1>>4]
t[j + 1] = upperhex[c1 >> 4]
t[j + 2] = upperhex[c1 & 15]
j += 3
}
else {
} else {
t[j] = s[i]
j++
}
@ -345,9 +343,8 @@ pub fn user(username string) &Userinfo {
// ``is NOT RECOMMENDED, because the passing of authentication
// information in clear text (such as URI) has proven to be a
// security risk in almost every case where it has been used.''
fn user_password(username, password string) &Userinfo {
return &Userinfo{
username,password,true}
fn user_password(username string, password string) &Userinfo {
return &Userinfo{username, password, true}
}
// The Userinfo type is an immutable encapsulation of username and
@ -382,23 +379,20 @@ fn (u &Userinfo) str() string {
// (scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
// If so, return [scheme, path]; else return ['', rawurl]
fn split_by_scheme(rawurl string) ?[]string {
for i in 0..rawurl.len {
for i in 0 .. rawurl.len {
c := rawurl[i]
if (`a` <= c && c <= `z`) || (`A` <= c && c <= `Z`) {
// do nothing
}
else if (`0` <= c && c <= `9`) || (c == `+` || c == `-` || c == `.`) {
} else if (`0` <= c && c <= `9`) || (c == `+` || c == `-` || c == `.`) {
if i == 0 {
return ['', rawurl]
}
}
else if c == `:` {
} else if c == `:` {
if i == 0 {
return error(error_msg('split_by_scheme: missing protocol scheme', ''))
}
return [rawurl[..i], rawurl[i + 1..]]
}
else {
} else {
// we have encountered an invalid character,
// so there is no valid scheme
return ['', rawurl]
@ -417,15 +411,15 @@ fn get_scheme(rawurl string) ?string {
// split slices s into two substrings separated by the first occurence of
// sep. If cutc is true then sep is included with the second substring.
// If sep does not occur in s then s and the empty string is returned.
fn split(s string, sep byte, cutc bool) (string,string) {
fn split(s string, sep byte, cutc bool) (string, string) {
i := s.index_byte(sep)
if i < 0 {
return s,''
return s, ''
}
if cutc {
return s[..i],s[i + 1..]
return s[..i], s[i + 1..]
}
return s[..i],s[i..]
return s[..i], s[i..]
}
// parse parses rawurl into a URL structure.
@ -436,7 +430,7 @@ fn split(s string, sep byte, cutc bool) (string,string) {
// error, due to parsing ambiguities.
pub fn parse(rawurl string) ?URL {
// Cut off #frag
u,frag := split(rawurl, `#`, true)
u, frag := split(rawurl, `#`, true)
mut url := parse_url(u, false) or {
return error(error_msg(err_msg_parse, u))
}
@ -479,7 +473,7 @@ fn parse_url(rawurl string, via_request bool) ?URL {
}
// Split off possible leading 'http:', 'mailto:', etc.
// Cannot contain escaped characters.
p := split_by_scheme(rawurl)?
p := split_by_scheme(rawurl) ?
url.scheme = p[0]
mut rest := p[1]
url.scheme = url.scheme.to_lower()
@ -487,9 +481,8 @@ fn parse_url(rawurl string, via_request bool) ?URL {
if rest.ends_with('?') && !rest[..1].contains('?') {
url.force_query = true
rest = rest[..rest.len - 1]
}
else {
r,raw_query := split(rest, `?`, true)
} else {
r, raw_query := split(rest, `?`, true)
rest = r
url.raw_query = raw_query
}
@ -516,13 +509,14 @@ fn parse_url(rawurl string, via_request bool) ?URL {
}
if colon >= 0 && (slash < 0 || colon < slash) {
// First path segment has colon. Not allowed in relative URL.
return error(error_msg('parse_url: first path segment in URL cannot contain colon', ''))
return error(error_msg('parse_url: first path segment in URL cannot contain colon',
''))
}
}
if ((url.scheme != '' || !via_request) && !rest.starts_with('///')) && rest.starts_with('//') {
authority,r := split(rest[2..], `/`, false)
authority, r := split(rest[2..], `/`, false)
rest = r
a := parse_authority(authority)?
a := parse_authority(authority) ?
url.user = a.user
url.host = a.host
}
@ -530,7 +524,7 @@ fn parse_url(rawurl string, via_request bool) ?URL {
// raw_path is a hint of the encoding of path. We don't want to set it if
// the default escaping of path is equivalent, to help make sure that people
// don't rely on it in general.
url.set_path(rest)?
url.set_path(rest) ?
return url
}
@ -546,11 +540,10 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
mut host := ''
mut zuser := user('')
if i < 0 {
h := parse_host(authority)?
h := parse_host(authority) ?
host = h
}
else {
h := parse_host(authority[i + 1..])?
} else {
h := parse_host(authority[i + 1..]) ?
host = h
}
if i < 0 {
@ -564,15 +557,14 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
return error(error_msg('parse_authority: invalid userinfo', ''))
}
if !userinfo.contains(':') {
u := unescape(userinfo, .encode_user_password)?
u := unescape(userinfo, .encode_user_password) ?
userinfo = u
zuser = user(userinfo)
}
else {
mut username,mut password := split(userinfo, `:`, true)
u := unescape(username, .encode_user_password)?
} else {
mut username, mut password := split(userinfo, `:`, true)
u := unescape(username, .encode_user_password) ?
username = u
p := unescape(password, .encode_user_password)?
p := unescape(password, .encode_user_password) ?
password = p
zuser = user_password(username, password)
}
@ -593,7 +585,8 @@ fn parse_host(host string) ?string {
}
mut colon_port := host[i + 1..]
if !valid_optional_port(colon_port) {
return error(error_msg('parse_host: invalid port $colon_port after host ', ''))
return error(error_msg('parse_host: invalid port $colon_port after host ',
''))
}
// RFC 6874 defines that %25 (%-encoded percent) introduces
// the zone identifier, and the zone identifier can use basically
@ -601,7 +594,7 @@ fn parse_host(host string) ?string {
// can only %-encode non-ASCII bytes.
// We do impose some restrictions on the zone, to avoid stupidity
// like newlines.
if zone:=host[..i].index('%25'){
if zone := host[..i].index('%25') {
host1 := unescape(host[..zone], .encode_host) or {
return err
}
@ -613,10 +606,11 @@ fn parse_host(host string) ?string {
}
return host1 + host2 + host3
}
if idx:=host.last_index(':'){
if idx := host.last_index(':') {
colon_port = host[idx..]
if !valid_optional_port(colon_port) {
return error(error_msg('parse_host: invalid port $colon_port after host ', ''))
return error(error_msg('parse_host: invalid port $colon_port after host ',
''))
}
}
}
@ -627,6 +621,7 @@ fn parse_host(host string) ?string {
// host = h
// return host
}
// set_path sets the path and raw_path fields of the URL based on the provided
// escaped path p. It maintains the invariant that raw_path is only specified
// when it differs from the default encoding of the path.
@ -636,14 +631,13 @@ fn parse_host(host string) ?string {
// set_path will return an error only if the provided path contains an invalid
// escaping.
pub fn (mut u URL) set_path(p string) ?bool {
path := unescape(p, .encode_path)?
path := unescape(p, .encode_path) ?
u.path = path
escp := escape(path, .encode_path)
if p == escp {
// Default encoding is fine.
u.raw_path = ''
}
else {
} else {
u.raw_path = p
}
return true
@ -674,7 +668,7 @@ fn (u &URL) escaped_path() string {
// valid_encoded_path reports whether s is a valid encoded path.
// It must not contain any bytes that require escaping during path encoding.
fn valid_encoded_path(s string) bool {
for i in 0..s.len {
for i in 0 .. s.len {
// RFC 3986, Appendix A.
// pchar = unreserved / pct-encoded / sub-delims / ':' / '@'.
// should_escape is not quite compliant with the RFC,
@ -695,7 +689,8 @@ fn valid_encoded_path(s string) bool {
if should_escape(s[i], .encode_path) {
return false
}
}}
}
}
}
return true
}
@ -746,8 +741,7 @@ pub fn (u URL) str() string {
}
if u.opaque != '' {
buf.write(u.opaque)
}
else {
} else {
if u.scheme != '' || u.host != '' || (u.user != 0 && !u.user.empty()) {
if u.host != '' || u.path != '' || !u.user.empty() {
buf.write('//')
@ -804,7 +798,7 @@ pub fn (u URL) str() string {
// interpreted as a key set to an empty value.
pub fn parse_query(query string) ?Values {
mut m := new_values()
parse_query_values(mut m, query)?
parse_query_values(mut m, query) ?
return m
}
@ -825,15 +819,14 @@ fn parse_query_values(mut m Values, query string) ?bool {
if i >= 0 {
q = key[i + 1..]
key = key[..i]
}
else {
} else {
q = ''
}
if key == '' {
continue
}
mut value := ''
if idx:=key.index('='){
if idx := key.index('=') {
i = idx
value = key[i + 1..]
key = key[..i]
@ -885,18 +878,16 @@ pub fn (v Values) encode() string {
// resolve_path applies special path segments from refs and applies
// them to base, per RFC 3986.
fn resolve_path(base, ref string) string {
fn resolve_path(base string, ref string) string {
mut full := ''
if ref == '' {
full = base
}
else if ref[0] != `/` {
} else if ref[0] != `/` {
i := base.last_index('/') or {
-1
}
full = base[..i + 1] + ref
}
else {
} else {
full = ref
}
if full == '' {
@ -916,7 +907,8 @@ fn resolve_path(base, ref string) string {
}
else {
dst << elem
}}
}
}
}
last := src[src.len - 1]
if last == '.' || last == '..' {
@ -936,7 +928,7 @@ pub fn (u &URL) is_abs() bool {
// may be relative or absolute. parse returns nil, err on parse
// failure, otherwise its return value is the same as resolve_reference.
pub fn (u &URL) parse(ref string) ?URL {
refurl := parse(ref)?
refurl := parse(ref) ?
return u.resolve_reference(refurl)
}
@ -955,7 +947,7 @@ pub fn (u &URL) resolve_reference(ref &URL) ?URL {
// The 'absoluteURI' or 'net_path' cases.
// We can ignore the error from set_path since we know we provided a
// validly-escaped path.
url.set_path(resolve_path(ref.escaped_path(), ''))?
url.set_path(resolve_path(ref.escaped_path(), '')) ?
return url
}
if ref.opaque != '' {
@ -973,7 +965,7 @@ pub fn (u &URL) resolve_reference(ref &URL) ?URL {
// The 'abs_path' or 'rel_path' cases.
url.host = u.host
url.user = u.user
url.set_path(resolve_path(u.escaped_path(), ref.escaped_path()))?
url.set_path(resolve_path(u.escaped_path(), ref.escaped_path())) ?
return url
}
@ -994,8 +986,7 @@ pub fn (u &URL) request_uri() string {
if result == '' {
result = '/'
}
}
else {
} else {
if result.starts_with('//') {
result = u.scheme + ':' + result
}
@ -1011,21 +1002,21 @@ pub fn (u &URL) request_uri() string {
// If the result is enclosed in square brackets, as literal IPv6 addresses are,
// the square brackets are removed from the result.
pub fn (u &URL) hostname() string {
host,_ := split_host_port(u.host)
host, _ := split_host_port(u.host)
return host
}
// port returns the port part of u.host, without the leading colon.
// If u.host doesn't contain a port, port returns an empty string.
pub fn (u &URL) port() string {
_,port := split_host_port(u.host)
_, port := split_host_port(u.host)
return port
}
// split_host_port separates host and port. If the port is not valid, it returns
// the entire input as host, and it doesn't check the validity of the host.
// Per RFC 3986, it requires ports to be numeric.
fn split_host_port(hostport string) (string,string) {
fn split_host_port(hostport string) (string, string) {
mut host := hostport
mut port := ''
colon := host.last_index_byte(`:`)
@ -1036,7 +1027,7 @@ fn split_host_port(hostport string) (string,string) {
if host.starts_with('[') && host.ends_with(']') {
host = host[1..host.len - 1]
}
return host,port
return host, port
}
// valid_userinfo reports whether s is a valid userinfo string per RFC 3986
@ -1059,19 +1050,16 @@ pub fn valid_userinfo(s string) bool {
continue
}
match r {
`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` {
continue
}
else {
return false
}}
`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` { continue }
else { return false }
}
}
return true
}
// string_contains_ctl_byte reports whether s contains any ASCII control character.
fn string_contains_ctl_byte(s string) bool {
for i in 0..s.len {
for i in 0 .. s.len {
b := s[i]
if b < ` ` || b == 0x7f {
return true
@ -1083,11 +1071,9 @@ fn string_contains_ctl_byte(s string) bool {
pub fn ishex(c byte) bool {
if `0` <= c && c <= `9` {
return true
}
else if `a` <= c && c <= `f` {
} else if `a` <= c && c <= `f` {
return true
}
else if `A` <= c && c <= `F` {
} else if `A` <= c && c <= `F` {
return true
}
return false
@ -1096,11 +1082,9 @@ pub fn ishex(c byte) bool {
fn unhex(c byte) byte {
if `0` <= c && c <= `9` {
return c - `0`
}
else if `a` <= c && c <= `f` {
} else if `a` <= c && c <= `f` {
return c - `a` + 10
}
else if `A` <= c && c <= `F` {
} else if `A` <= c && c <= `F` {
return c - `A` + 10
}
return 0