mirror of
https://github.com/vlang/v.git
synced 2025-09-13 22:42:26 +03:00
This commit is contained in:
parent
680b0d463a
commit
0df6fcce8c
2 changed files with 65 additions and 17 deletions
|
@ -1067,22 +1067,57 @@ fn test_split_into_lines() {
|
|||
}
|
||||
}
|
||||
|
||||
fn test_string_literal_with_backslash() {
|
||||
a := 'HelloWorld'
|
||||
const single_backslash = '\\'
|
||||
const double_backslash = '\\\\'
|
||||
const newline = '\n'
|
||||
|
||||
// vfmt off
|
||||
fn test_string_literal_with_backslash_followed_by_newline() {
|
||||
// Note `\` is followed *directly* by a newline, then some more whitespace, then a non whitespace string.
|
||||
// In this case, the \ is treated as line breaking, and the whitespace after that on the new line,
|
||||
// should be just ignored.
|
||||
//
|
||||
// See also https://doc.rust-lang.org/reference/tokens.html#string-literals
|
||||
// >> Both byte sequences are normally translated to U+000A, but as a special exception,
|
||||
// when an unescaped U+005C character occurs immediately before the line-break,
|
||||
// the U+005C character, the line-break, and all whitespace at the beginning of the
|
||||
// next line are ignored.
|
||||
a := 'Hello\
|
||||
World'
|
||||
assert a == 'HelloWorld'
|
||||
|
||||
b := 'OneTwoThree'
|
||||
assert b == 'OneTwoThree'
|
||||
}
|
||||
// Here, `\\\` means `\\` followed by `\`, followed by a newline.
|
||||
// the first is a single escaped \, that should go into the literal, the second together with
|
||||
// the newline and the whitespace after it, is a line-break, and should be simply ignored.
|
||||
// Same with `\\\\\`, which is `\\\\`, followed by `\`, i.e. an escaped double backslash,
|
||||
// and a line-break after it:
|
||||
b := 'One \
|
||||
Two Three \\\
|
||||
Four \\\\
|
||||
Five \\\\\
|
||||
end'
|
||||
assert b == 'One Two Three ${single_backslash}Four ${double_backslash}${newline} Five ${double_backslash}end'
|
||||
|
||||
// Note `\\` is followed *directly* by a newline, but `\\` is just an escape for `\`,
|
||||
// and thus the newline has no special meaning, and should go into the string literal.
|
||||
c := 'Hello\\
|
||||
World'
|
||||
assert c == 'Hello\\\n World'
|
||||
|
||||
d := 'One\\
|
||||
Two Three \\
|
||||
Four'
|
||||
assert d == 'One\\\n Two Three \\\n Four'
|
||||
}
|
||||
// vfmt on
|
||||
|
||||
/*
|
||||
type MyString = string
|
||||
|
||||
fn test_string_alias() {
|
||||
s := MyString('hi')
|
||||
ss := s + '!'
|
||||
assert ss == 'hi!'
|
||||
}
|
||||
*/
|
||||
|
||||
// sort an array of structs, by their string field values
|
||||
|
||||
|
|
|
@ -1239,7 +1239,7 @@ pub fn (mut s Scanner) ident_string() string {
|
|||
backslash_count++
|
||||
}
|
||||
// end of string
|
||||
if c == s.quote && (is_raw || backslash_count % 2 == 0) {
|
||||
if c == s.quote && (is_raw || backslash_count & 1 == 0) {
|
||||
// handle '123\\' backslash at the end
|
||||
break
|
||||
}
|
||||
|
@ -1253,7 +1253,7 @@ pub fn (mut s Scanner) ident_string() string {
|
|||
s.inc_line_number()
|
||||
}
|
||||
// Escape `\x` `\u` `\U`
|
||||
if backslash_count % 2 == 1 && !is_raw && !is_cstr {
|
||||
if backslash_count & 1 == 1 && !is_raw && !is_cstr {
|
||||
// Escape `\x`
|
||||
if c == `x` {
|
||||
if s.text[s.pos + 1] == s.quote || !(s.text[s.pos + 1].is_hex_digit()
|
||||
|
@ -1287,13 +1287,13 @@ pub fn (mut s Scanner) ident_string() string {
|
|||
u32_escapes_pos << s.pos - 1
|
||||
}
|
||||
// Unknown escape sequence
|
||||
if !is_escape_sequence(c) && !c.is_digit() {
|
||||
if !is_escape_sequence(c) && !c.is_digit() && c != `\n` {
|
||||
s.error('`${c.ascii_str()}` unknown escape sequence')
|
||||
}
|
||||
}
|
||||
// ${var} (ignore in vfmt mode) (skip \$)
|
||||
if prevc == `$` && c == `{` && !is_raw
|
||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) % 2 == 0 {
|
||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) & 1 == 0 {
|
||||
s.is_inside_string = true
|
||||
if s.is_enclosed_inter {
|
||||
s.is_nested_enclosed_inter = true
|
||||
|
@ -1306,7 +1306,7 @@ pub fn (mut s Scanner) ident_string() string {
|
|||
}
|
||||
// $var
|
||||
if prevc == `$` && util.is_name_char(c) && !is_raw
|
||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) % 2 == 0 {
|
||||
&& s.count_symbol_before(s.pos - 2, scanner.backslash) & 1 == 0 {
|
||||
s.is_inside_string = true
|
||||
s.is_inter_start = true
|
||||
s.pos -= 2
|
||||
|
@ -1483,13 +1483,26 @@ fn trim_slash_line_break(s string) string {
|
|||
mut start := 0
|
||||
mut ret_str := s
|
||||
for {
|
||||
// find the position of the first `\` followed by a newline, after `start`:
|
||||
idx := ret_str.index_after('\\\n', start)
|
||||
if idx != -1 {
|
||||
ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
|
||||
start = idx
|
||||
} else {
|
||||
if idx == -1 {
|
||||
break
|
||||
}
|
||||
start = idx
|
||||
// Here, ret_str[idx] is \, and ret_str[idx+1] is newline.
|
||||
// Depending on the number of backslashes before the newline, we should either
|
||||
// treat the last one and the whitespace after it as line-break, or just ignore it:
|
||||
mut nbackslashes := 0
|
||||
for eidx := idx; eidx >= 0 && ret_str[eidx] == `\\`; eidx-- {
|
||||
nbackslashes++
|
||||
}
|
||||
// eprintln('>> start: ${start:-5} | nbackslashes: ${nbackslashes:-5} | ret_str: $ret_str')
|
||||
if idx == 0 || (nbackslashes & 1) == 1 {
|
||||
ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
|
||||
} else {
|
||||
// ensure the loop will terminate, when we could not strip anything:
|
||||
start++
|
||||
}
|
||||
}
|
||||
return ret_str
|
||||
}
|
||||
|
@ -1560,7 +1573,7 @@ pub fn (mut s Scanner) ident_char() string {
|
|||
// e.g. (octal) \141 (hex) \x61 or (unicode) \u2605 or (32 bit unicode) \U00002605
|
||||
// we don't handle binary escape codes in rune literals
|
||||
orig := c
|
||||
if c.len % 2 == 0
|
||||
if c.len & 1 == 0
|
||||
&& (escaped_hex || escaped_unicode_16 || escaped_unicode_32 || escaped_octal) {
|
||||
if escaped_unicode_16 {
|
||||
// there can only be one, so attempt to decode it now
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue