diff --git a/vlib/x/json2/decoder2/decode.v b/vlib/x/json2/decoder2/decode.v index b3900dfe51..d0a5d1b752 100644 --- a/vlib/x/json2/decoder2/decode.v +++ b/vlib/x/json2/decoder2/decode.v @@ -412,11 +412,41 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { string_buffer << `\t` } `u` { - string_buffer << rune(strconv.parse_uint(decoder.json[ + unicode_point := rune(strconv.parse_uint(decoder.json[ string_info.position + string_index..string_info.position + - string_index + 4], 16, 32)!).bytes() + string_index + 4], 16, 32)!) string_index += 4 + + if unicode_point < 0xD800 { // normal utf-8 + string_buffer << unicode_point.bytes() + } else if unicode_point >= 0xDC00 { // trail surrogate -> invalid + decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')! + } else { // head surrogate -> treat as utf-16 + if string_index > string_info.length - 6 { + decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')! + } + if decoder.json[string_info.position + string_index.. + string_info.position + string_index + 2] != '\\u' { + decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')! + } + + string_index += 2 + + unicode_point2 := rune(strconv.parse_uint(decoder.json[ + string_info.position + string_index..string_info.position + + string_index + 4], 16, 32)!) + + string_index += 4 + + if unicode_point2 < 0xDC00 { + decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')! + } + + final_unicode_point := (unicode_point2 & 0x3FF) + + ((unicode_point & 0x3FF) << 10) + 0x10000 + string_buffer << final_unicode_point.bytes() + } } else {} // has already been checked } diff --git a/vlib/x/json2/decoder2/tests/decode_escaped_string_test.v b/vlib/x/json2/decoder2/tests/decode_escaped_string_test.v index e79a61c3fb..d3e437895a 100644 --- a/vlib/x/json2/decoder2/tests/decode_escaped_string_test.v +++ b/vlib/x/json2/decoder2/tests/decode_escaped_string_test.v @@ -10,3 +10,50 @@ fn test_decode_escaped_string() { assert escaped_strings == decoded_strings } + +fn test_surrogate() { + assert decoder2.decode[string](r'"\ud83d\ude00"')! == '😀' + assert decoder2.decode[string](r'"\ud83d\ude00 text"')! == '😀 text' +} + +fn test_invalid_surrogate() { + if x := decoder2.decode[string](r'"\ud83d"') { + assert false + } else { + if err is decoder2.JsonDecodeError { + assert err.line == 1 + assert err.character == 1 + assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.' + } + } + + if x := decoder2.decode[string](r'"\ud83d\n\n\n\n"') { + assert false + } else { + if err is decoder2.JsonDecodeError { + assert err.line == 1 + assert err.character == 1 + assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.' + } + } + + if x := decoder2.decode[string](r'"\ud83d\ud83d"') { + assert false + } else { + if err is decoder2.JsonDecodeError { + assert err.line == 1 + assert err.character == 1 + assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got D83D.' + } + } + + if x := decoder2.decode[string](r'"\ude00\ud83d"') { + assert false + } else { + if err is decoder2.JsonDecodeError { + assert err.line == 1 + assert err.character == 1 + assert err.message == 'Data: Got trail surrogate: DE00 before head surrogate.' + } + } +}