mirror of
https://github.com/vlang/v.git
synced 2025-09-13 22:42:26 +03:00
This commit is contained in:
parent
680b0d463a
commit
0df6fcce8c
2 changed files with 65 additions and 17 deletions
|
@ -1067,22 +1067,57 @@ fn test_split_into_lines() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_string_literal_with_backslash() {
|
const single_backslash = '\\'
|
||||||
a := 'HelloWorld'
|
const double_backslash = '\\\\'
|
||||||
|
const newline = '\n'
|
||||||
|
|
||||||
|
// vfmt off
|
||||||
|
fn test_string_literal_with_backslash_followed_by_newline() {
|
||||||
|
// Note `\` is followed *directly* by a newline, then some more whitespace, then a non whitespace string.
|
||||||
|
// In this case, the \ is treated as line breaking, and the whitespace after that on the new line,
|
||||||
|
// should be just ignored.
|
||||||
|
//
|
||||||
|
// See also https://doc.rust-lang.org/reference/tokens.html#string-literals
|
||||||
|
// >> Both byte sequences are normally translated to U+000A, but as a special exception,
|
||||||
|
// when an unescaped U+005C character occurs immediately before the line-break,
|
||||||
|
// the U+005C character, the line-break, and all whitespace at the beginning of the
|
||||||
|
// next line are ignored.
|
||||||
|
a := 'Hello\
|
||||||
|
World'
|
||||||
assert a == 'HelloWorld'
|
assert a == 'HelloWorld'
|
||||||
|
|
||||||
b := 'OneTwoThree'
|
// Here, `\\\` means `\\` followed by `\`, followed by a newline.
|
||||||
assert b == 'OneTwoThree'
|
// the first is a single escaped \, that should go into the literal, the second together with
|
||||||
}
|
// the newline and the whitespace after it, is a line-break, and should be simply ignored.
|
||||||
|
// Same with `\\\\\`, which is `\\\\`, followed by `\`, i.e. an escaped double backslash,
|
||||||
|
// and a line-break after it:
|
||||||
|
b := 'One \
|
||||||
|
Two Three \\\
|
||||||
|
Four \\\\
|
||||||
|
Five \\\\\
|
||||||
|
end'
|
||||||
|
assert b == 'One Two Three ${single_backslash}Four ${double_backslash}${newline} Five ${double_backslash}end'
|
||||||
|
|
||||||
|
// Note `\\` is followed *directly* by a newline, but `\\` is just an escape for `\`,
|
||||||
|
// and thus the newline has no special meaning, and should go into the string literal.
|
||||||
|
c := 'Hello\\
|
||||||
|
World'
|
||||||
|
assert c == 'Hello\\\n World'
|
||||||
|
|
||||||
|
d := 'One\\
|
||||||
|
Two Three \\
|
||||||
|
Four'
|
||||||
|
assert d == 'One\\\n Two Three \\\n Four'
|
||||||
|
}
|
||||||
|
// vfmt on
|
||||||
|
|
||||||
/*
|
|
||||||
type MyString = string
|
type MyString = string
|
||||||
|
|
||||||
fn test_string_alias() {
|
fn test_string_alias() {
|
||||||
s := MyString('hi')
|
s := MyString('hi')
|
||||||
ss := s + '!'
|
ss := s + '!'
|
||||||
|
assert ss == 'hi!'
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
// sort an array of structs, by their string field values
|
// sort an array of structs, by their string field values
|
||||||
|
|
||||||
|
|
|
@ -1239,7 +1239,7 @@ pub fn (mut s Scanner) ident_string() string {
|
||||||
backslash_count++
|
backslash_count++
|
||||||
}
|
}
|
||||||
// end of string
|
// end of string
|
||||||
if c == s.quote && (is_raw || backslash_count % 2 == 0) {
|
if c == s.quote && (is_raw || backslash_count & 1 == 0) {
|
||||||
// handle '123\\' backslash at the end
|
// handle '123\\' backslash at the end
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -1253,7 +1253,7 @@ pub fn (mut s Scanner) ident_string() string {
|
||||||
s.inc_line_number()
|
s.inc_line_number()
|
||||||
}
|
}
|
||||||
// Escape `\x` `\u` `\U`
|
// Escape `\x` `\u` `\U`
|
||||||
if backslash_count % 2 == 1 && !is_raw && !is_cstr {
|
if backslash_count & 1 == 1 && !is_raw && !is_cstr {
|
||||||
// Escape `\x`
|
// Escape `\x`
|
||||||
if c == `x` {
|
if c == `x` {
|
||||||
if s.text[s.pos + 1] == s.quote || !(s.text[s.pos + 1].is_hex_digit()
|
if s.text[s.pos + 1] == s.quote || !(s.text[s.pos + 1].is_hex_digit()
|
||||||
|
@ -1287,13 +1287,13 @@ pub fn (mut s Scanner) ident_string() string {
|
||||||
u32_escapes_pos << s.pos - 1
|
u32_escapes_pos << s.pos - 1
|
||||||
}
|
}
|
||||||
// Unknown escape sequence
|
// Unknown escape sequence
|
||||||
if !is_escape_sequence(c) && !c.is_digit() {
|
if !is_escape_sequence(c) && !c.is_digit() && c != `\n` {
|
||||||
s.error('`${c.ascii_str()}` unknown escape sequence')
|
s.error('`${c.ascii_str()}` unknown escape sequence')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ${var} (ignore in vfmt mode) (skip \$)
|
// ${var} (ignore in vfmt mode) (skip \$)
|
||||||
if prevc == `$` && c == `{` && !is_raw
|
if prevc == `$` && c == `{` && !is_raw
|
||||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) % 2 == 0 {
|
&& s.count_symbol_before(s.pos - 2, scanner.backslash) & 1 == 0 {
|
||||||
s.is_inside_string = true
|
s.is_inside_string = true
|
||||||
if s.is_enclosed_inter {
|
if s.is_enclosed_inter {
|
||||||
s.is_nested_enclosed_inter = true
|
s.is_nested_enclosed_inter = true
|
||||||
|
@ -1306,7 +1306,7 @@ pub fn (mut s Scanner) ident_string() string {
|
||||||
}
|
}
|
||||||
// $var
|
// $var
|
||||||
if prevc == `$` && util.is_name_char(c) && !is_raw
|
if prevc == `$` && util.is_name_char(c) && !is_raw
|
||||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) % 2 == 0 {
|
&& s.count_symbol_before(s.pos - 2, scanner.backslash) & 1 == 0 {
|
||||||
s.is_inside_string = true
|
s.is_inside_string = true
|
||||||
s.is_inter_start = true
|
s.is_inter_start = true
|
||||||
s.pos -= 2
|
s.pos -= 2
|
||||||
|
@ -1483,13 +1483,26 @@ fn trim_slash_line_break(s string) string {
|
||||||
mut start := 0
|
mut start := 0
|
||||||
mut ret_str := s
|
mut ret_str := s
|
||||||
for {
|
for {
|
||||||
|
// find the position of the first `\` followed by a newline, after `start`:
|
||||||
idx := ret_str.index_after('\\\n', start)
|
idx := ret_str.index_after('\\\n', start)
|
||||||
if idx != -1 {
|
if idx == -1 {
|
||||||
ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
|
|
||||||
start = idx
|
|
||||||
} else {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
start = idx
|
||||||
|
// Here, ret_str[idx] is \, and ret_str[idx+1] is newline.
|
||||||
|
// Depending on the number of backslashes before the newline, we should either
|
||||||
|
// treat the last one and the whitespace after it as line-break, or just ignore it:
|
||||||
|
mut nbackslashes := 0
|
||||||
|
for eidx := idx; eidx >= 0 && ret_str[eidx] == `\\`; eidx-- {
|
||||||
|
nbackslashes++
|
||||||
|
}
|
||||||
|
// eprintln('>> start: ${start:-5} | nbackslashes: ${nbackslashes:-5} | ret_str: $ret_str')
|
||||||
|
if idx == 0 || (nbackslashes & 1) == 1 {
|
||||||
|
ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
|
||||||
|
} else {
|
||||||
|
// ensure the loop will terminate, when we could not strip anything:
|
||||||
|
start++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ret_str
|
return ret_str
|
||||||
}
|
}
|
||||||
|
@ -1560,7 +1573,7 @@ pub fn (mut s Scanner) ident_char() string {
|
||||||
// e.g. (octal) \141 (hex) \x61 or (unicode) \u2605 or (32 bit unicode) \U00002605
|
// e.g. (octal) \141 (hex) \x61 or (unicode) \u2605 or (32 bit unicode) \U00002605
|
||||||
// we don't handle binary escape codes in rune literals
|
// we don't handle binary escape codes in rune literals
|
||||||
orig := c
|
orig := c
|
||||||
if c.len % 2 == 0
|
if c.len & 1 == 0
|
||||||
&& (escaped_hex || escaped_unicode_16 || escaped_unicode_32 || escaped_octal) {
|
&& (escaped_hex || escaped_unicode_16 || escaped_unicode_32 || escaped_octal) {
|
||||||
if escaped_unicode_16 {
|
if escaped_unicode_16 {
|
||||||
// there can only be one, so attempt to decode it now
|
// there can only be one, so attempt to decode it now
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue