decoder2: improve checker with better EOF detection (#25075)
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Shy and PV CI / v-compiles-puzzle-vibes (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled
json encoder benchmark CI / json-encode-benchmark (push) Has been cancelled

This commit is contained in:
Larsimusrex 2025-08-09 15:38:58 +02:00 committed by GitHub
parent 9140c9f844
commit a11de7263f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 140 additions and 230 deletions

View file

@ -1,14 +1,28 @@
module decoder2 module decoder2
// increment checks eof and increments checker by one
@[inline]
fn (mut checker Decoder) increment(message string) ! {
if checker.checker_idx + 1 == checker.json.len {
if message == '' {
return Error{}
}
checker.checker_error('EOF: ' + message)!
}
checker.checker_idx++
}
// skip_whitespace checks eof and increments checker until next non whitespace character
@[inline]
fn (mut checker Decoder) skip_whitespace(message string) ! {
for checker.json[checker.checker_idx] in whitespace_chars {
checker.increment(message)!
}
}
// check_json_format checks if the JSON string is valid and updates the decoder state. // check_json_format checks if the JSON string is valid and updates the decoder state.
fn (mut checker Decoder) check_json_format() ! { fn (mut checker Decoder) check_json_format() ! {
// skip whitespace checker.skip_whitespace('empty json')!
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
break
}
checker.checker_idx++
}
start_idx_position := checker.checker_idx start_idx_position := checker.checker_idx
@ -18,7 +32,7 @@ fn (mut checker Decoder) check_json_format() ! {
`"` { `"` {
checker.values_info.push(ValueInfo{ checker.values_info.push(ValueInfo{
position: checker.checker_idx position: checker.checker_idx
value_kind: .string_ value_kind: .string
}) })
actual_value_info_pointer = checker.values_info.last() actual_value_info_pointer = checker.values_info.last()
@ -82,54 +96,36 @@ fn (mut checker Decoder) check_json_format() ! {
actual_value_info_pointer.length = checker.checker_idx + 1 - start_idx_position actual_value_info_pointer.length = checker.checker_idx + 1 - start_idx_position
if checker.checker_idx < checker.json.len { checker.increment('') or { return }
checker.checker_idx++ checker.skip_whitespace('') or { return }
}
for checker.checker_idx < checker.json.len if checker.json[checker.checker_idx] !in [`,`, `:`, `}`, `]`] {
&& checker.json[checker.checker_idx] !in [`,`, `:`, `}`, `]`] {
// get trash characters after the value
if checker.json[checker.checker_idx] !in whitespace_chars {
checker.checker_error('invalid value. Unexpected character after ${actual_value_info_pointer.value_kind} end')! checker.checker_error('invalid value. Unexpected character after ${actual_value_info_pointer.value_kind} end')!
} else {
// whitespace
}
checker.checker_idx++
} }
} }
fn (mut checker Decoder) check_string() ! { fn (mut checker Decoder) check_string() ! {
// check if the JSON string is a valid string checker.increment('string not closed')!
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: string not closed')
}
checker.checker_idx++
// check if the JSON string is a valid escape sequence // check if the JSON string is a valid escape sequence
for checker.json[checker.checker_idx] != `"` { for checker.json[checker.checker_idx] != `"` {
if checker.json[checker.checker_idx] == `\\` { if checker.json[checker.checker_idx] == `\\` {
if checker.checker_idx + 1 >= checker.json.len - 1 { checker.increment('invalid escape sequence')!
return checker.checker_error('invalid escape sequence') escaped_char := checker.json[checker.checker_idx]
}
escaped_char := checker.json[checker.checker_idx + 1]
match escaped_char { match escaped_char {
`/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` { `/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` {}
checker.checker_idx++ // make sure escaped quotation marks are skipped
}
`u` { `u` {
// check if the JSON string is a valid unicode escape sequence // check if the JSON string is a valid unicode escape sequence
escaped_char_last_index := checker.checker_idx + 5 escaped_char_last_index := checker.checker_idx + 4
if escaped_char_last_index < checker.json.len { if escaped_char_last_index < checker.json.len {
// 2 bytes for the unicode escape sequence `\u` // 2 bytes for the unicode escape sequence `\u`
checker.checker_idx += 2 checker.increment('invalid escape sequence')!
for checker.checker_idx < escaped_char_last_index { for checker.checker_idx < escaped_char_last_index {
match checker.json[checker.checker_idx] { match checker.json[checker.checker_idx] {
`0`...`9`, `a`...`f`, `A`...`F` { `0`...`9`, `a`...`f`, `A`...`F` {
checker.checker_idx++ checker.increment('invalid unicode escape sequence')!
} }
else { else {
return checker.checker_error('invalid unicode escape sequence') return checker.checker_error('invalid unicode escape sequence')
@ -138,7 +134,7 @@ fn (mut checker Decoder) check_string() ! {
} }
continue continue
} else { } else {
return checker.checker_error('short unicode escape sequence ${checker.json[checker.checker_idx..escaped_char_last_index]}') return checker.checker_error('short unicode escape sequence ${checker.json[checker.checker_idx - 1..checker.json.len - 1]}')
} }
} }
else { else {
@ -146,81 +142,57 @@ fn (mut checker Decoder) check_string() ! {
} }
} }
} }
checker.checker_idx++ checker.increment('string not closed')!
} }
} }
fn (mut checker Decoder) check_number() ! { fn (mut checker Decoder) check_number() ! {
// check if the JSON string is a valid float or integer // check if the JSON string is a valid float or integer
if checker.json[checker.checker_idx] == `-` { if checker.json[checker.checker_idx] == `-` {
checker.checker_idx++ checker.increment('expected digit')!
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
} }
// integer part // integer part
if checker.json[checker.checker_idx] == `0` { if checker.json[checker.checker_idx] == `0` {
checker.checker_idx++ checker.increment('') or { return }
} else if checker.json[checker.checker_idx] >= `1` && checker.json[checker.checker_idx] <= `9` { } else if checker.json[checker.checker_idx] >= `1` && checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++ checker.increment('') or { return }
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0` for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
&& checker.json[checker.checker_idx] <= `9` { checker.increment('') or { return }
checker.checker_idx++
} }
} else { } else {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}') return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
} }
// fraction part // fraction part
if checker.checker_idx != checker.json.len && checker.json[checker.checker_idx] == `.` { if checker.json[checker.checker_idx] == `.` {
checker.checker_idx++ checker.increment('expected digit')!
if checker.checker_idx == checker.json.len { if !(checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9`) {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
if checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0`
&& checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
}
} else {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}') return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
} }
for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
checker.increment('') or { return }
}
} }
// exponent part // exponent part
if checker.checker_idx != checker.json.len if checker.json[checker.checker_idx] == `e` || checker.json[checker.checker_idx] == `E` {
&& (checker.json[checker.checker_idx] == `e` || checker.json[checker.checker_idx] == `E`) { checker.increment('expected digit')!
checker.checker_idx++
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
if checker.json[checker.checker_idx] == `-` || checker.json[checker.checker_idx] == `+` { if checker.json[checker.checker_idx] == `-` || checker.json[checker.checker_idx] == `+` {
checker.checker_idx++ checker.increment('expected digit')!
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
} }
if checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` { if !(checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9`) {
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0`
&& checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
}
} else {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}') return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
} }
for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
checker.increment('') or { return }
}
} }
checker.checker_idx-- checker.checker_idx--
@ -284,160 +256,58 @@ fn (mut checker Decoder) check_null() ! {
} }
fn (mut checker Decoder) check_array() ! { fn (mut checker Decoder) check_array() ! {
// check if the JSON string is an empty array checker.increment('expected array end')!
if checker.json.len >= checker.checker_idx + 2 {
checker.checker_idx++ checker.skip_whitespace('expected array end')!
} else {
return checker.checker_error('EOF error: There are not enough length for an array')
}
for checker.json[checker.checker_idx] != `]` { for checker.json[checker.checker_idx] != `]` {
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
checker.checker_idx--
break
}
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `]` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: array not closed')
}
checker.check_json_format()! checker.check_json_format()!
// whitespace checker.skip_whitespace('expected array end')!
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `]` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
if checker.json[checker.checker_idx] == `,` { if checker.json[checker.checker_idx] == `,` {
checker.checker_idx++ checker.increment('expected array value')!
for checker.json[checker.checker_idx] in whitespace_chars { checker.skip_whitespace('') or {}
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `]` { if checker.json[checker.checker_idx] == `]` {
return checker.checker_error('Cannot use `,`, before `]`') return checker.checker_error('Cannot use `,`, before `]`')
} }
continue
} else {
if checker.json[checker.checker_idx] == `]` {
break
} else {
return checker.checker_error('`]` after value')
}
} }
} }
} }
fn (mut checker Decoder) check_object() ! { fn (mut checker Decoder) check_object() ! {
if checker.json.len - checker.checker_idx < 2 { checker.increment('expected object end')!
return checker.checker_error('EOF error: expecting a complete object after `{`')
} checker.skip_whitespace('expected object end')!
checker.checker_idx++
for checker.json[checker.checker_idx] != `}` { for checker.json[checker.checker_idx] != `}` {
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
checker.checker_idx--
break
}
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `}` {
continue
}
if checker.json[checker.checker_idx] != `"` { if checker.json[checker.checker_idx] != `"` {
return checker.checker_error('Expecting object key') checker.checker_error('Expecting object key')!
} }
// Object key
checker.check_json_format()! checker.check_json_format()!
for checker.json[checker.checker_idx] != `:` { checker.skip_whitespace('expected `:`')!
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: key colon not found')
}
if checker.json[checker.checker_idx] !in whitespace_chars {
return checker.checker_error('invalid value after object key')
}
checker.checker_idx++
}
if checker.json[checker.checker_idx] != `:` { if checker.json[checker.checker_idx] != `:` {
return checker.checker_error('Expecting `:` after object key') checker.checker_error('expected `:`, got `${checker.json[checker.checker_idx].ascii_str()}`')!
} }
// skip `:` checker.increment('expected object value')!
checker.checker_idx++
// skip whitespace checker.skip_whitespace('expected object value')!
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
match checker.json[checker.checker_idx] {
`"`, `[`, `{`, `0`...`9`, `-`, `n`, `t`, `f` {
checker.check_json_format()! checker.check_json_format()!
if checker.checker_idx == checker.json.len { checker.skip_whitespace('expected object end')!
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
// whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `}` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
if checker.json[checker.checker_idx] == `,` { if checker.json[checker.checker_idx] == `,` {
checker.checker_idx++ checker.increment('expected object key')!
checker.skip_whitespace('') or {}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: Expecting object key after `,`')
}
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] != `"` {
return checker.checker_error('Expecting object key after `,`')
}
} else {
if checker.json[checker.checker_idx] == `}` { if checker.json[checker.checker_idx] == `}` {
break return checker.checker_error('Cannot use `,`, before `}`')
} else {
return checker.checker_error('invalid object value')
}
}
}
else {
return checker.checker_error('invalid object value')
} }
} }
} }

View file

@ -126,7 +126,7 @@ fn (list &LinkedList[T]) free() {
enum ValueKind { enum ValueKind {
array array
object object
string_ string
number number
boolean boolean
null null
@ -316,7 +316,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
$if val is StringDecoder { $if val is StringDecoder {
struct_info := decoder.current_node.value struct_info := decoder.current_node.value
if struct_info.value_kind == .string_ { if struct_info.value_kind == .string {
val.from_json_string(decoder.json[struct_info.position + 1..struct_info.position + val.from_json_string(decoder.json[struct_info.position + 1..struct_info.position +
struct_info.length - 1]) or { struct_info.length - 1]) or {
decoder.decode_error('${typeof(*val).name}: ${err.msg()}')! decoder.decode_error('${typeof(*val).name}: ${err.msg()}')!
@ -370,7 +370,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
$if T.unaliased_typ is string { $if T.unaliased_typ is string {
string_info := decoder.current_node.value string_info := decoder.current_node.value
if string_info.value_kind == .string_ { if string_info.value_kind == .string {
mut string_buffer := []u8{cap: string_info.length} // might be too long but most json strings don't contain many escape characters anyways mut string_buffer := []u8{cap: string_info.length} // might be too long but most json strings don't contain many escape characters anyways
mut buffer_index := 1 mut buffer_index := 1
@ -542,7 +542,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
current_field_info = current_field_info.next current_field_info = current_field_info.next
continue continue
} }
.string_ { .string {
if decoder.current_node.next.value.length == 2 { if decoder.current_node.next.value.length == 2 {
current_field_info = current_field_info.next current_field_info = current_field_info.next
continue continue
@ -705,7 +705,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
if value_info.value_kind == .number { if value_info.value_kind == .number {
unsafe { decoder.decode_number(&val)! } unsafe { decoder.decode_number(&val)! }
} else if value_info.value_kind == .string_ { } else if value_info.value_kind == .string {
// recheck if string contains number // recheck if string contains number
decoder.checker_idx = value_info.position + 1 decoder.checker_idx = value_info.position + 1
decoder.check_number()! decoder.check_number()!

View file

@ -42,7 +42,7 @@ fn (mut decoder Decoder) check_element_type_valid[T](element T, current_node &No
} }
match current_node.value.value_kind { match current_node.value.value_kind {
.string_ { .string {
$if element is string { $if element is string {
return true return true
} $else $if element is time.Time { } $else $if element is time.Time {
@ -220,7 +220,7 @@ fn (mut decoder Decoder) init_sumtype_by_value_kind[T](mut val T, value_info Val
mut failed_struct := false mut failed_struct := false
match value_info.value_kind { match value_info.value_kind {
.string_ { .string {
$for v in val.variants { $for v in val.variants {
$if v.typ is string { $if v.typ is string {
val = T(v) val = T(v)

View file

@ -20,7 +20,7 @@ fn test_check_if_json_match() {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 1 assert err.character == 1
assert err.message == 'Data: Expected object, but got string_' assert err.message == 'Data: Expected object, but got string'
} }
has_error = true has_error = true
} }
@ -115,20 +115,60 @@ fn test_check_json_format() {
}, },
{ {
'json': '{"key": 123' 'json': '{"key": 123'
'error': 'Syntax: EOF error: braces are not closed' 'error': 'Syntax: Expecting object key' // improve message
}, },
{ {
'json': '{"key": 123,' 'json': '{"key": 123,'
'error': 'Syntax: EOF error: Expecting object key after `,`' 'error': 'Syntax: EOF: expected object key'
}, },
{ {
'json': '{"key": 123, "key2": 456,}' 'json': '{"key": 123, "key2": 456,}'
'error': 'Syntax: Expecting object key after `,`' 'error': 'Syntax: Cannot use `,`, before `}`'
}, },
{ {
'json': '[[1, 2, 3], [4, 5, 6],]' 'json': '[[1, 2, 3], [4, 5, 6],]'
'error': 'Syntax: Cannot use `,`, before `]`' 'error': 'Syntax: Cannot use `,`, before `]`'
}, },
{
'json': ' '
'error': 'Syntax: EOF: empty json'
},
{
'json': '"'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"not closed'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"\\"'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"\\u8"'
'error': 'Syntax: short unicode escape sequence \\u8'
},
{
'json': '['
'error': 'Syntax: EOF: expected array end'
},
{
'json': '[ '
'error': 'Syntax: EOF: expected array end'
},
{
'json': '{'
'error': 'Syntax: EOF: expected object end'
},
{
'json': '{ '
'error': 'Syntax: EOF: expected object end'
},
{
'json': '{"key": "value" '
'error': 'Syntax: EOF: expected object end'
},
] ]
for json_and_error in json_and_error_message { for json_and_error in json_and_error_message {

View file

@ -36,7 +36,7 @@ fn test_json_string_invalid_escapes() {
json.decode[string](r'"\x"') or { json.decode[string](r'"\x"') or {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 2 assert err.character == 3
assert err.message == 'Syntax: unknown escape sequence' assert err.message == 'Syntax: unknown escape sequence'
} }
has_error = true has_error = true
@ -48,7 +48,7 @@ fn test_json_string_invalid_escapes() {
json.decode[string](r'"\u123"') or { json.decode[string](r'"\u123"') or {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 2 assert err.character == 3
assert err.message == 'Syntax: short unicode escape sequence \\u123' assert err.message == 'Syntax: short unicode escape sequence \\u123'
} }
has_error = true has_error = true

View file

@ -60,7 +60,7 @@ fn test_raw_decode_map_invalid() {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 8 assert err.character == 8
assert err.message == 'Syntax: invalid value after object key' assert err.message == 'Syntax: expected `:`, got `,`'
} }
return return

View file

@ -58,7 +58,7 @@ fn test_decode_error_message_should_have_enough_context_just_brace() {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 1 assert err.character == 1
assert err.message == 'Syntax: EOF error: expecting a complete object after `{`' assert err.message == 'Syntax: EOF: expected object end'
} }
return return
} }
@ -76,7 +76,7 @@ fn test_decode_error_message_should_have_enough_context_trailing_comma_at_end()
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 5 assert err.line == 5
assert err.character == 1 assert err.character == 1
assert err.message == 'Syntax: Expecting object key after `,`' assert err.message == 'Syntax: Cannot use `,`, before `}`'
} }
return return
@ -90,7 +90,7 @@ fn test_decode_error_message_should_have_enough_context_in_the_middle() {
if err is json.JsonDecodeError { if err is json.JsonDecodeError {
assert err.line == 1 assert err.line == 1
assert err.character == 40 assert err.character == 40
assert err.message == 'Syntax: invalid value. Unexpected character after string_ end' assert err.message == 'Syntax: invalid value. Unexpected character after string end'
} }
return return
} }

View file

@ -44,7 +44,7 @@ enum ValueKind {
unknown unknown
array array
object object
string_ string
number number
boolean boolean
null null
@ -56,7 +56,7 @@ fn (k ValueKind) str() string {
.unknown { 'unknown' } .unknown { 'unknown' }
.array { 'array' } .array { 'array' }
.object { 'object' } .object { 'object' }
.string_ { 'string' } .string { 'string' }
.number { 'number' } .number { 'number' }
.boolean { 'boolean' } .boolean { 'boolean' }
.null { 'null' } .null { 'null' }