mirror of
https://github.com/vlang/v.git
synced 2025-09-13 14:32:26 +03:00
decoder2: add support for decoding utf-16 surrogates, produced by some JSON encoder implementations (Python, Java, C#) (#25193)
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Shy and PV CI / v-compiles-puzzle-vibes (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Shy and PV CI / v-compiles-puzzle-vibes (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled
This commit is contained in:
parent
ae8134705b
commit
24f91280d9
2 changed files with 79 additions and 2 deletions
|
@ -412,11 +412,41 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
|
||||||
string_buffer << `\t`
|
string_buffer << `\t`
|
||||||
}
|
}
|
||||||
`u` {
|
`u` {
|
||||||
string_buffer << rune(strconv.parse_uint(decoder.json[
|
unicode_point := rune(strconv.parse_uint(decoder.json[
|
||||||
string_info.position + string_index..string_info.position +
|
string_info.position + string_index..string_info.position +
|
||||||
string_index + 4], 16, 32)!).bytes()
|
string_index + 4], 16, 32)!)
|
||||||
|
|
||||||
string_index += 4
|
string_index += 4
|
||||||
|
|
||||||
|
if unicode_point < 0xD800 { // normal utf-8
|
||||||
|
string_buffer << unicode_point.bytes()
|
||||||
|
} else if unicode_point >= 0xDC00 { // trail surrogate -> invalid
|
||||||
|
decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')!
|
||||||
|
} else { // head surrogate -> treat as utf-16
|
||||||
|
if string_index > string_info.length - 6 {
|
||||||
|
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
|
||||||
|
}
|
||||||
|
if decoder.json[string_info.position + string_index..
|
||||||
|
string_info.position + string_index + 2] != '\\u' {
|
||||||
|
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
|
||||||
|
}
|
||||||
|
|
||||||
|
string_index += 2
|
||||||
|
|
||||||
|
unicode_point2 := rune(strconv.parse_uint(decoder.json[
|
||||||
|
string_info.position + string_index..string_info.position +
|
||||||
|
string_index + 4], 16, 32)!)
|
||||||
|
|
||||||
|
string_index += 4
|
||||||
|
|
||||||
|
if unicode_point2 < 0xDC00 {
|
||||||
|
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')!
|
||||||
|
}
|
||||||
|
|
||||||
|
final_unicode_point := (unicode_point2 & 0x3FF) +
|
||||||
|
((unicode_point & 0x3FF) << 10) + 0x10000
|
||||||
|
string_buffer << final_unicode_point.bytes()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {} // has already been checked
|
else {} // has already been checked
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,3 +10,50 @@ fn test_decode_escaped_string() {
|
||||||
|
|
||||||
assert escaped_strings == decoded_strings
|
assert escaped_strings == decoded_strings
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_surrogate() {
|
||||||
|
assert decoder2.decode[string](r'"\ud83d\ude00"')! == '😀'
|
||||||
|
assert decoder2.decode[string](r'"\ud83d\ude00 text"')! == '😀 text'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_invalid_surrogate() {
|
||||||
|
if x := decoder2.decode[string](r'"\ud83d"') {
|
||||||
|
assert false
|
||||||
|
} else {
|
||||||
|
if err is decoder2.JsonDecodeError {
|
||||||
|
assert err.line == 1
|
||||||
|
assert err.character == 1
|
||||||
|
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if x := decoder2.decode[string](r'"\ud83d\n\n\n\n"') {
|
||||||
|
assert false
|
||||||
|
} else {
|
||||||
|
if err is decoder2.JsonDecodeError {
|
||||||
|
assert err.line == 1
|
||||||
|
assert err.character == 1
|
||||||
|
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if x := decoder2.decode[string](r'"\ud83d\ud83d"') {
|
||||||
|
assert false
|
||||||
|
} else {
|
||||||
|
if err is decoder2.JsonDecodeError {
|
||||||
|
assert err.line == 1
|
||||||
|
assert err.character == 1
|
||||||
|
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got D83D.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if x := decoder2.decode[string](r'"\ude00\ud83d"') {
|
||||||
|
assert false
|
||||||
|
} else {
|
||||||
|
if err is decoder2.JsonDecodeError {
|
||||||
|
assert err.line == 1
|
||||||
|
assert err.character == 1
|
||||||
|
assert err.message == 'Data: Got trail surrogate: DE00 before head surrogate.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue