decoder2: improve enum decoding; fix handling of required fields at the end of a json string (#25289)
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled

This commit is contained in:
Larsimusrex 2025-09-12 09:57:15 +02:00 committed by GitHub
parent 56f20d1ff8
commit a8d200ac0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 258 additions and 121 deletions

View file

@ -38,6 +38,16 @@ struct StruWithRequiredAttribute {
b int b int
} }
struct Foo {
a int @[required]
}
fn test_last_field_requiered() {
assert json.decode[Foo]('{"a":0}')! == Foo{
a: 0
}
}
fn test_skip_and_rename_attributes() { fn test_skip_and_rename_attributes() {
assert json.decode[StruWithJsonAttribute]('{"name": "hola1", "a": 2, "b": 3}')! == StruWithJsonAttribute{ assert json.decode[StruWithJsonAttribute]('{"name": "hola1", "a": 2, "b": 3}')! == StruWithJsonAttribute{
a: 2 a: 2

View file

@ -38,7 +38,7 @@ struct StructFieldInfo {
is_required bool is_required bool
is_raw bool is_raw bool
mut: mut:
decoded_with_value_info_node &Node[ValueInfo] = unsafe { nil } is_decoded bool
} }
// Decoder represents a JSON decoder. // Decoder represents a JSON decoder.
@ -368,105 +368,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
} }
} }
$if T.unaliased_typ is string { $if T.unaliased_typ is string {
string_info := decoder.current_node.value decoder.decode_string(mut val)!
if string_info.value_kind == .string {
mut string_buffer := []u8{cap: string_info.length} // might be too long but most json strings don't contain many escape characters anyways
mut buffer_index := 1
mut string_index := 1
for string_index < string_info.length - 1 {
current_byte := decoder.json[string_info.position + string_index]
if current_byte == `\\` {
// push all characters up to this point
unsafe {
string_buffer.push_many(decoder.json.str + string_info.position +
buffer_index, string_index - buffer_index)
}
string_index++
escaped_char := decoder.json[string_info.position + string_index]
string_index++
match escaped_char {
`/`, `"`, `\\` {
string_buffer << escaped_char
}
`b` {
string_buffer << `\b`
}
`f` {
string_buffer << `\f`
}
`n` {
string_buffer << `\n`
}
`r` {
string_buffer << `\r`
}
`t` {
string_buffer << `\t`
}
`u` {
unicode_point := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point < 0xD800 { // normal utf-8
string_buffer << unicode_point.bytes()
} else if unicode_point >= 0xDC00 { // trail surrogate -> invalid
decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')!
} else { // head surrogate -> treat as utf-16
if string_index > string_info.length - 6 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
if decoder.json[string_info.position + string_index..
string_info.position + string_index + 2] != '\\u' {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
string_index += 2
unicode_point2 := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point2 < 0xDC00 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')!
}
final_unicode_point := (unicode_point2 & 0x3FF) +
((unicode_point & 0x3FF) << 10) + 0x10000
string_buffer << final_unicode_point.bytes()
}
}
else {} // has already been checked
}
buffer_index = string_index
} else {
string_index++
}
}
// push the rest
unsafe {
string_buffer.push_many(decoder.json.str + string_info.position + buffer_index,
string_index - buffer_index)
}
val = string_buffer.bytestr()
} else {
return decoder.decode_error('Expected string, but got ${string_info.value_kind}')
}
} $else $if T.unaliased_typ is $sumtype { } $else $if T.unaliased_typ is $sumtype {
decoder.decode_sumtype(mut val)! decoder.decode_sumtype(mut val)!
return return
@ -509,7 +411,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
for attr in field.attrs { for attr in field.attrs {
if attr.starts_with('json:') { if attr.starts_with('json:') {
if attr.len <= 6 { if attr.len <= 6 {
return decoder.decode_error('`json` attribute must have an argument') decoder.decode_error('`json` attribute must have an argument')!
} }
json_name_str = unsafe { attr.str + 6 } json_name_str = unsafe { attr.str + 6 }
json_name_len = attr.len - 6 json_name_len = attr.len - 6
@ -615,9 +517,9 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
if current_field_info.value.is_skip { if current_field_info.value.is_skip {
if current_field_info.value.is_required == false { if current_field_info.value.is_required == false {
return decoder.decode_error('This should not happen. Please, file a bug. `skip` field should not be processed here without a `required` attribute') decoder.decode_error('This should not happen. Please, file a bug. `skip` field should not be processed here without a `required` attribute')!
} }
current_field_info.value.decoded_with_value_info_node = decoder.current_node current_field_info.value.is_decoded = true
if decoder.current_node != unsafe { nil } { if decoder.current_node != unsafe { nil } {
decoder.current_node = decoder.current_node.next decoder.current_node = decoder.current_node.next
} }
@ -627,7 +529,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
if current_field_info.value.is_raw { if current_field_info.value.is_raw {
$if field.unaliased_typ is $enum { $if field.unaliased_typ is $enum {
// workaround to avoid the error: enums can only be assigned `int` values // workaround to avoid the error: enums can only be assigned `int` values
return decoder.decode_error('`raw` attribute cannot be used with enum fields') decoder.decode_error('`raw` attribute cannot be used with enum fields')!
} $else $if field.typ is ?string { } $else $if field.typ is ?string {
position := decoder.current_node.value.position position := decoder.current_node.value.position
end := position + decoder.current_node.value.length end := position + decoder.current_node.value.length
@ -659,7 +561,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
decoder.current_node = decoder.current_node.next decoder.current_node = decoder.current_node.next
} }
} $else { } $else {
return decoder.decode_error('`raw` attribute can only be used with string fields') decoder.decode_error('`raw` attribute can only be used with string fields')!
} }
} else { } else {
$if field.typ is $option { $if field.typ is $option {
@ -682,7 +584,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
decoder.decode_value(mut val.$(field.name))! decoder.decode_value(mut val.$(field.name))!
} }
} }
current_field_info.value.decoded_with_value_info_node = decoder.current_node current_field_info.value.is_decoded = true
break break
} }
} }
@ -705,15 +607,15 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
current_field_info = current_field_info.next current_field_info = current_field_info.next
continue continue
} }
if current_field_info.value.decoded_with_value_info_node == unsafe { nil } { if !current_field_info.value.is_decoded {
return decoder.decode_error('missing required field `${unsafe { decoder.decode_error('missing required field `${unsafe {
tos(current_field_info.value.field_name_str, current_field_info.value.field_name_len) tos(current_field_info.value.field_name_str, current_field_info.value.field_name_len)
}}`') }}`')!
} }
current_field_info = current_field_info.next current_field_info = current_field_info.next
} }
} else { } else {
return decoder.decode_error('Expected object, but got ${struct_info.value_kind}') decoder.decode_error('Expected object, but got ${struct_info.value_kind}')!
} }
unsafe { unsafe {
struct_fields_info.free() struct_fields_info.free()
@ -723,14 +625,14 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
value_info := decoder.current_node.value value_info := decoder.current_node.value
if value_info.value_kind != .boolean { if value_info.value_kind != .boolean {
return decoder.decode_error('Expected boolean, but got ${value_info.value_kind}') decoder.decode_error('Expected boolean, but got ${value_info.value_kind}')!
} }
unsafe { unsafe {
val = vmemcmp(decoder.json.str + value_info.position, true_in_string.str, val = vmemcmp(decoder.json.str + value_info.position, true_in_string.str,
true_in_string.len) == 0 true_in_string.len) == 0
} }
} $else $if T.unaliased_typ is $float || T.unaliased_typ is $int || T.unaliased_typ is $enum { } $else $if T.unaliased_typ is $float || T.unaliased_typ is $int {
value_info := decoder.current_node.value value_info := decoder.current_node.value
if value_info.value_kind == .number { if value_info.value_kind == .number {
@ -742,10 +644,12 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
unsafe { decoder.decode_number(&val)! } unsafe { decoder.decode_number(&val)! }
} else { } else {
return decoder.decode_error('Expected number, but got ${value_info.value_kind}') decoder.decode_error('Expected number, but got ${value_info.value_kind}')!
} }
} $else $if T.unaliased_typ is $enum {
decoder.decode_enum(mut val)!
} $else { } $else {
return decoder.decode_error('cannot decode value with ${typeof(val).name} type') decoder.decode_error('cannot decode value with ${typeof(val).name} type')!
} }
if decoder.current_node != unsafe { nil } { if decoder.current_node != unsafe { nil } {
@ -753,6 +657,108 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
} }
} }
fn (mut decoder Decoder) decode_string[T](mut val T) ! {
string_info := decoder.current_node.value
if string_info.value_kind == .string {
mut string_buffer := []u8{cap: string_info.length} // might be too long but most json strings don't contain many escape characters anyways
mut buffer_index := 1
mut string_index := 1
for string_index < string_info.length - 1 {
current_byte := decoder.json[string_info.position + string_index]
if current_byte == `\\` {
// push all characters up to this point
unsafe {
string_buffer.push_many(decoder.json.str + string_info.position + buffer_index,
string_index - buffer_index)
}
string_index++
escaped_char := decoder.json[string_info.position + string_index]
string_index++
match escaped_char {
`/`, `"`, `\\` {
string_buffer << escaped_char
}
`b` {
string_buffer << `\b`
}
`f` {
string_buffer << `\f`
}
`n` {
string_buffer << `\n`
}
`r` {
string_buffer << `\r`
}
`t` {
string_buffer << `\t`
}
`u` {
unicode_point := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point < 0xD800 || unicode_point > 0xDFFF { // normal utf-8
string_buffer << unicode_point.bytes()
} else if unicode_point >= 0xDC00 { // trail surrogate -> invalid
decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')!
} else { // head surrogate -> treat as utf-16
if string_index > string_info.length - 6 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
if decoder.json[string_info.position + string_index..
string_info.position + string_index + 2] != '\\u' {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
string_index += 2
unicode_point2 := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point2 < 0xDC00 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')!
}
final_unicode_point := (unicode_point2 & 0x3FF) +
((unicode_point & 0x3FF) << 10) + 0x10000
string_buffer << final_unicode_point.bytes()
}
}
else {} // has already been checked
}
buffer_index = string_index
} else {
string_index++
}
}
// push the rest
unsafe {
string_buffer.push_many(decoder.json.str + string_info.position + buffer_index,
string_index - buffer_index)
}
val = string_buffer.bytestr()
} else {
decoder.decode_error('Expected string, but got ${string_info.value_kind}')!
}
}
fn (mut decoder Decoder) decode_array[T](mut val []T) ! { fn (mut decoder Decoder) decode_array[T](mut val []T) ! {
array_info := decoder.current_node.value array_info := decoder.current_node.value
@ -775,7 +781,7 @@ fn (mut decoder Decoder) decode_array[T](mut val []T) ! {
val << array_element val << array_element
} }
} else { } else {
return decoder.decode_error('Expected array, but got ${array_info.value_kind}') decoder.decode_error('Expected array, but got ${array_info.value_kind}')!
} }
} }
@ -819,7 +825,7 @@ fn (mut decoder Decoder) decode_map[K, V](mut val map[K]V) ! {
decoder.decode_value(mut val[key])! decoder.decode_value(mut val[key])!
} }
} else { } else {
return decoder.decode_error('Expected object, but got ${map_info.value_kind}') decoder.decode_error('Expected object, but got ${map_info.value_kind}')!
} }
} }
@ -889,6 +895,36 @@ fn get_number_digits[T](num T) int {
} }
} }
fn (mut decoder Decoder) decode_enum[T](mut val T) ! {
enum_info := decoder.current_node.value
if enum_info.value_kind == .number {
mut result := 0
unsafe { decoder.decode_number(&result)! }
$for value in T.values {
if int(value.value) == result {
val = value.value
return
}
}
decoder.decode_error('Number value: `${result}` does not match any field in enum: ${typeof(val).name}')!
} else if enum_info.value_kind == .string {
mut result := ''
unsafe { decoder.decode_value(mut result)! }
$for value in T.values {
if value.name == result {
val = value.value
return
}
}
decoder.decode_error('String value: `${result}` does not match any field in enum: ${typeof(val).name}')!
}
decoder.decode_error('Expected number or string value for enum, got: ${enum_info.value_kind}')!
}
// use pointer instead of mut so enum cast works // use pointer instead of mut so enum cast works
@[unsafe] @[unsafe]
fn (mut decoder Decoder) decode_number[T](val &T) ! { fn (mut decoder Decoder) decode_number[T](val &T) ! {
@ -904,10 +940,6 @@ fn (mut decoder Decoder) decode_number[T](val &T) ! {
$if T.unaliased_typ is $float { $if T.unaliased_typ is $float {
*val = T(strconv.atof_quick(decoder.json[number_info.position..number_info.position + *val = T(strconv.atof_quick(decoder.json[number_info.position..number_info.position +
number_info.length])) number_info.length]))
} $else $if T.unaliased_typ is $enum {
mut result := 0
decoder.decode_number(&result)!
*val = T(result)
} $else { // this part is a minefield } $else { // this part is a minefield
mut is_negative := false mut is_negative := false
mut index := 0 mut index := 0

View file

@ -0,0 +1,95 @@
import x.json2.decoder2 as json
enum Bar {
a
b
c = 10
}
type BarAlias = Bar
fn test_number_decode() {
assert json.decode[Bar]('0')! == Bar.a
assert json.decode[Bar]('1')! == Bar.b
assert json.decode[Bar]('10')! == Bar.c
assert json.decode[BarAlias]('0')! == Bar.a
assert json.decode[BarAlias]('1')! == Bar.b
assert json.decode[BarAlias]('10')! == Bar.c
}
fn test_number_decode_fails() {
if _ := json.decode[Bar]('2') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Number value: `2` does not match any field in enum: &Bar'
}
}
if _ := json.decode[BarAlias]('2') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Number value: `2` does not match any field in enum: &BarAlias'
}
}
}
fn test_string_decode() {
assert json.decode[Bar]('"a"')! == Bar.a
assert json.decode[Bar]('"b"')! == Bar.b
assert json.decode[Bar]('"c"')! == Bar.c
assert json.decode[BarAlias]('"a"')! == Bar.a
assert json.decode[BarAlias]('"b"')! == Bar.b
assert json.decode[BarAlias]('"c"')! == Bar.c
}
fn test_string_decode_fails() {
if _ := json.decode[Bar]('"d"') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: String value: `d` does not match any field in enum: &Bar'
}
}
if _ := json.decode[BarAlias]('"d"') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: String value: `d` does not match any field in enum: &BarAlias'
}
}
}
fn test_invalid_decode_fails() {
if _ := json.decode[Bar]('true') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected number or string value for enum, got: boolean'
}
}
if _ := json.decode[BarAlias]('true') {
assert false
} else {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected number or string value for enum, got: boolean'
}
}
}

View file

@ -3,7 +3,7 @@ import x.json2.decoder2
fn test_decode_escaped_string() { fn test_decode_escaped_string() {
escaped_strings := ['test', 'test\\sd', 'test\nsd', '\ntest', 'test\\"', 'test\\', 'test\u1234ps', escaped_strings := ['test', 'test\\sd', 'test\nsd', '\ntest', 'test\\"', 'test\\', 'test\u1234ps',
'test\u1234', '\u1234\\\t"', ''] 'test\u1234', '\u1234\\\t"', '', '\uff0f', 'test \uff0f test', '😀', 'text 😀 text']
json_string := json2.encode[[]string](escaped_strings) json_string := json2.encode[[]string](escaped_strings)
decoded_strings := decoder2.decode[[]string](json_string)! decoded_strings := decoder2.decode[[]string](json_string)!