decoder2: Add support for surrogates

This commit is contained in:
Larsimusrex 2025-08-29 20:42:20 +02:00
parent 7831fb0b82
commit f7f4ac4bb9
2 changed files with 79 additions and 2 deletions

View file

@ -412,11 +412,41 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
string_buffer << `\t` string_buffer << `\t`
} }
`u` { `u` {
string_buffer << rune(strconv.parse_uint(decoder.json[ unicode_point := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position + string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!).bytes() string_index + 4], 16, 32)!)
string_index += 4 string_index += 4
if unicode_point < 0xD800 { // normal utf-8
string_buffer << unicode_point.bytes()
} else if unicode_point >= 0xDC00 { // trail surrogate -> invalid
decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')!
} else { // head surrogate -> treat as utf-16
if string_index > string_info.length - 6 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
if decoder.json[string_info.position + string_index..
string_info.position + string_index + 2] != '\\u' {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
string_index += 2
unicode_point2 := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point2 < 0xDC00 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')!
}
final_unicode_point := (unicode_point2 & 0x3FF) +
((unicode_point & 0x3FF) << 10) + 0x10000
string_buffer << final_unicode_point.bytes()
}
} }
else {} // has already been checked else {} // has already been checked
} }

View file

@ -10,3 +10,50 @@ fn test_decode_escaped_string() {
assert escaped_strings == decoded_strings assert escaped_strings == decoded_strings
} }
fn test_surrogate() {
assert decoder2.decode[string](r'"\ud83d\ude00"')! == '😀'
assert decoder2.decode[string](r'"\ud83d\ude00 text"')! == '😀 text'
}
fn test_invalid_surrogate() {
if x := decoder2.decode[string](r'"\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
}
}
if x := decoder2.decode[string](r'"\ud83d\n\n\n\n"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
}
}
if x := decoder2.decode[string](r'"\ud83d\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got D83D.'
}
}
if x := decoder2.decode[string](r'"\ude00\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Got trail surrogate: DE00 before head surrogate.'
}
}
}