decoder2: improve checker with better EOF detection (#25075)
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Shy and PV CI / v-compiles-puzzle-vibes (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled
json encoder benchmark CI / json-encode-benchmark (push) Has been cancelled

This commit is contained in:
Larsimusrex 2025-08-09 15:38:58 +02:00 committed by GitHub
parent 9140c9f844
commit a11de7263f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 140 additions and 230 deletions

View file

@ -1,14 +1,28 @@
module decoder2
// increment checks eof and increments checker by one
@[inline]
fn (mut checker Decoder) increment(message string) ! {
if checker.checker_idx + 1 == checker.json.len {
if message == '' {
return Error{}
}
checker.checker_error('EOF: ' + message)!
}
checker.checker_idx++
}
// skip_whitespace checks eof and increments checker until next non whitespace character
@[inline]
fn (mut checker Decoder) skip_whitespace(message string) ! {
for checker.json[checker.checker_idx] in whitespace_chars {
checker.increment(message)!
}
}
// check_json_format checks if the JSON string is valid and updates the decoder state.
fn (mut checker Decoder) check_json_format() ! {
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
break
}
checker.checker_idx++
}
checker.skip_whitespace('empty json')!
start_idx_position := checker.checker_idx
@ -18,7 +32,7 @@ fn (mut checker Decoder) check_json_format() ! {
`"` {
checker.values_info.push(ValueInfo{
position: checker.checker_idx
value_kind: .string_
value_kind: .string
})
actual_value_info_pointer = checker.values_info.last()
@ -82,54 +96,36 @@ fn (mut checker Decoder) check_json_format() ! {
actual_value_info_pointer.length = checker.checker_idx + 1 - start_idx_position
if checker.checker_idx < checker.json.len {
checker.checker_idx++
}
checker.increment('') or { return }
checker.skip_whitespace('') or { return }
for checker.checker_idx < checker.json.len
&& checker.json[checker.checker_idx] !in [`,`, `:`, `}`, `]`] {
// get trash characters after the value
if checker.json[checker.checker_idx] !in whitespace_chars {
if checker.json[checker.checker_idx] !in [`,`, `:`, `}`, `]`] {
checker.checker_error('invalid value. Unexpected character after ${actual_value_info_pointer.value_kind} end')!
} else {
// whitespace
}
checker.checker_idx++
}
}
fn (mut checker Decoder) check_string() ! {
// check if the JSON string is a valid string
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: string not closed')
}
checker.checker_idx++
checker.increment('string not closed')!
// check if the JSON string is a valid escape sequence
for checker.json[checker.checker_idx] != `"` {
if checker.json[checker.checker_idx] == `\\` {
if checker.checker_idx + 1 >= checker.json.len - 1 {
return checker.checker_error('invalid escape sequence')
}
escaped_char := checker.json[checker.checker_idx + 1]
checker.increment('invalid escape sequence')!
escaped_char := checker.json[checker.checker_idx]
match escaped_char {
`/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` {
checker.checker_idx++ // make sure escaped quotation marks are skipped
}
`/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` {}
`u` {
// check if the JSON string is a valid unicode escape sequence
escaped_char_last_index := checker.checker_idx + 5
escaped_char_last_index := checker.checker_idx + 4
if escaped_char_last_index < checker.json.len {
// 2 bytes for the unicode escape sequence `\u`
checker.checker_idx += 2
checker.increment('invalid escape sequence')!
for checker.checker_idx < escaped_char_last_index {
match checker.json[checker.checker_idx] {
`0`...`9`, `a`...`f`, `A`...`F` {
checker.checker_idx++
checker.increment('invalid unicode escape sequence')!
}
else {
return checker.checker_error('invalid unicode escape sequence')
@ -138,7 +134,7 @@ fn (mut checker Decoder) check_string() ! {
}
continue
} else {
return checker.checker_error('short unicode escape sequence ${checker.json[checker.checker_idx..escaped_char_last_index]}')
return checker.checker_error('short unicode escape sequence ${checker.json[checker.checker_idx - 1..checker.json.len - 1]}')
}
}
else {
@ -146,81 +142,57 @@ fn (mut checker Decoder) check_string() ! {
}
}
}
checker.checker_idx++
checker.increment('string not closed')!
}
}
fn (mut checker Decoder) check_number() ! {
// check if the JSON string is a valid float or integer
if checker.json[checker.checker_idx] == `-` {
checker.checker_idx++
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
checker.increment('expected digit')!
}
// integer part
if checker.json[checker.checker_idx] == `0` {
checker.checker_idx++
checker.increment('') or { return }
} else if checker.json[checker.checker_idx] >= `1` && checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
checker.increment('') or { return }
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0`
&& checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
checker.increment('') or { return }
}
} else {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
}
// fraction part
if checker.checker_idx != checker.json.len && checker.json[checker.checker_idx] == `.` {
checker.checker_idx++
if checker.json[checker.checker_idx] == `.` {
checker.increment('expected digit')!
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
if checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0`
&& checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
}
} else {
if !(checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9`) {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
}
for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
checker.increment('') or { return }
}
}
// exponent part
if checker.checker_idx != checker.json.len
&& (checker.json[checker.checker_idx] == `e` || checker.json[checker.checker_idx] == `E`) {
checker.checker_idx++
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
if checker.json[checker.checker_idx] == `e` || checker.json[checker.checker_idx] == `E` {
checker.increment('expected digit')!
if checker.json[checker.checker_idx] == `-` || checker.json[checker.checker_idx] == `+` {
checker.checker_idx++
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('expected digit got EOF')
}
checker.increment('expected digit')!
}
if checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
for checker.checker_idx < checker.json.len && checker.json[checker.checker_idx] >= `0`
&& checker.json[checker.checker_idx] <= `9` {
checker.checker_idx++
}
} else {
if !(checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9`) {
return checker.checker_error('expected digit got ${checker.json[checker.checker_idx].ascii_str()}')
}
for checker.json[checker.checker_idx] >= `0` && checker.json[checker.checker_idx] <= `9` {
checker.increment('') or { return }
}
}
checker.checker_idx--
@ -284,160 +256,58 @@ fn (mut checker Decoder) check_null() ! {
}
fn (mut checker Decoder) check_array() ! {
// check if the JSON string is an empty array
if checker.json.len >= checker.checker_idx + 2 {
checker.checker_idx++
} else {
return checker.checker_error('EOF error: There are not enough length for an array')
}
checker.increment('expected array end')!
checker.skip_whitespace('expected array end')!
for checker.json[checker.checker_idx] != `]` {
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
checker.checker_idx--
break
}
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `]` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: array not closed')
}
checker.check_json_format()!
// whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `]` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
checker.skip_whitespace('expected array end')!
if checker.json[checker.checker_idx] == `,` {
checker.checker_idx++
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
checker.increment('expected array value')!
checker.skip_whitespace('') or {}
if checker.json[checker.checker_idx] == `]` {
return checker.checker_error('Cannot use `,`, before `]`')
}
continue
} else {
if checker.json[checker.checker_idx] == `]` {
break
} else {
return checker.checker_error('`]` after value')
}
}
}
}
fn (mut checker Decoder) check_object() ! {
if checker.json.len - checker.checker_idx < 2 {
return checker.checker_error('EOF error: expecting a complete object after `{`')
}
checker.checker_idx++
checker.increment('expected object end')!
checker.skip_whitespace('expected object end')!
for checker.json[checker.checker_idx] != `}` {
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
if checker.checker_idx == checker.json.len {
checker.checker_idx--
break
}
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `}` {
continue
}
if checker.json[checker.checker_idx] != `"` {
return checker.checker_error('Expecting object key')
checker.checker_error('Expecting object key')!
}
// Object key
checker.check_json_format()!
for checker.json[checker.checker_idx] != `:` {
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: key colon not found')
}
if checker.json[checker.checker_idx] !in whitespace_chars {
return checker.checker_error('invalid value after object key')
}
checker.checker_idx++
}
checker.skip_whitespace('expected `:`')!
if checker.json[checker.checker_idx] != `:` {
return checker.checker_error('Expecting `:` after object key')
checker.checker_error('expected `:`, got `${checker.json[checker.checker_idx].ascii_str()}`')!
}
// skip `:`
checker.checker_idx++
checker.increment('expected object value')!
// skip whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
checker.skip_whitespace('expected object value')!
match checker.json[checker.checker_idx] {
`"`, `[`, `{`, `0`...`9`, `-`, `n`, `t`, `f` {
checker.check_json_format()!
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
// whitespace
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] == `}` {
break
}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: braces are not closed')
}
checker.skip_whitespace('expected object end')!
if checker.json[checker.checker_idx] == `,` {
checker.checker_idx++
checker.increment('expected object key')!
checker.skip_whitespace('') or {}
if checker.checker_idx == checker.json.len {
checker.checker_idx--
return checker.checker_error('EOF error: Expecting object key after `,`')
}
for checker.json[checker.checker_idx] in whitespace_chars {
checker.checker_idx++
}
if checker.json[checker.checker_idx] != `"` {
return checker.checker_error('Expecting object key after `,`')
}
} else {
if checker.json[checker.checker_idx] == `}` {
break
} else {
return checker.checker_error('invalid object value')
}
}
}
else {
return checker.checker_error('invalid object value')
return checker.checker_error('Cannot use `,`, before `}`')
}
}
}

View file

@ -126,7 +126,7 @@ fn (list &LinkedList[T]) free() {
enum ValueKind {
array
object
string_
string
number
boolean
null
@ -316,7 +316,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
$if val is StringDecoder {
struct_info := decoder.current_node.value
if struct_info.value_kind == .string_ {
if struct_info.value_kind == .string {
val.from_json_string(decoder.json[struct_info.position + 1..struct_info.position +
struct_info.length - 1]) or {
decoder.decode_error('${typeof(*val).name}: ${err.msg()}')!
@ -370,7 +370,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
$if T.unaliased_typ is string {
string_info := decoder.current_node.value
if string_info.value_kind == .string_ {
if string_info.value_kind == .string {
mut string_buffer := []u8{cap: string_info.length} // might be too long but most json strings don't contain many escape characters anyways
mut buffer_index := 1
@ -542,7 +542,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
current_field_info = current_field_info.next
continue
}
.string_ {
.string {
if decoder.current_node.next.value.length == 2 {
current_field_info = current_field_info.next
continue
@ -705,7 +705,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
if value_info.value_kind == .number {
unsafe { decoder.decode_number(&val)! }
} else if value_info.value_kind == .string_ {
} else if value_info.value_kind == .string {
// recheck if string contains number
decoder.checker_idx = value_info.position + 1
decoder.check_number()!

View file

@ -42,7 +42,7 @@ fn (mut decoder Decoder) check_element_type_valid[T](element T, current_node &No
}
match current_node.value.value_kind {
.string_ {
.string {
$if element is string {
return true
} $else $if element is time.Time {
@ -220,7 +220,7 @@ fn (mut decoder Decoder) init_sumtype_by_value_kind[T](mut val T, value_info Val
mut failed_struct := false
match value_info.value_kind {
.string_ {
.string {
$for v in val.variants {
$if v.typ is string {
val = T(v)

View file

@ -20,7 +20,7 @@ fn test_check_if_json_match() {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected object, but got string_'
assert err.message == 'Data: Expected object, but got string'
}
has_error = true
}
@ -115,20 +115,60 @@ fn test_check_json_format() {
},
{
'json': '{"key": 123'
'error': 'Syntax: EOF error: braces are not closed'
'error': 'Syntax: Expecting object key' // improve message
},
{
'json': '{"key": 123,'
'error': 'Syntax: EOF error: Expecting object key after `,`'
'error': 'Syntax: EOF: expected object key'
},
{
'json': '{"key": 123, "key2": 456,}'
'error': 'Syntax: Expecting object key after `,`'
'error': 'Syntax: Cannot use `,`, before `}`'
},
{
'json': '[[1, 2, 3], [4, 5, 6],]'
'error': 'Syntax: Cannot use `,`, before `]`'
},
{
'json': ' '
'error': 'Syntax: EOF: empty json'
},
{
'json': '"'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"not closed'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"\\"'
'error': 'Syntax: EOF: string not closed'
},
{
'json': '"\\u8"'
'error': 'Syntax: short unicode escape sequence \\u8'
},
{
'json': '['
'error': 'Syntax: EOF: expected array end'
},
{
'json': '[ '
'error': 'Syntax: EOF: expected array end'
},
{
'json': '{'
'error': 'Syntax: EOF: expected object end'
},
{
'json': '{ '
'error': 'Syntax: EOF: expected object end'
},
{
'json': '{"key": "value" '
'error': 'Syntax: EOF: expected object end'
},
]
for json_and_error in json_and_error_message {

View file

@ -36,7 +36,7 @@ fn test_json_string_invalid_escapes() {
json.decode[string](r'"\x"') or {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 2
assert err.character == 3
assert err.message == 'Syntax: unknown escape sequence'
}
has_error = true
@ -48,7 +48,7 @@ fn test_json_string_invalid_escapes() {
json.decode[string](r'"\u123"') or {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 2
assert err.character == 3
assert err.message == 'Syntax: short unicode escape sequence \\u123'
}
has_error = true

View file

@ -60,7 +60,7 @@ fn test_raw_decode_map_invalid() {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 8
assert err.message == 'Syntax: invalid value after object key'
assert err.message == 'Syntax: expected `:`, got `,`'
}
return

View file

@ -58,7 +58,7 @@ fn test_decode_error_message_should_have_enough_context_just_brace() {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Syntax: EOF error: expecting a complete object after `{`'
assert err.message == 'Syntax: EOF: expected object end'
}
return
}
@ -76,7 +76,7 @@ fn test_decode_error_message_should_have_enough_context_trailing_comma_at_end()
if err is json.JsonDecodeError {
assert err.line == 5
assert err.character == 1
assert err.message == 'Syntax: Expecting object key after `,`'
assert err.message == 'Syntax: Cannot use `,`, before `}`'
}
return
@ -90,7 +90,7 @@ fn test_decode_error_message_should_have_enough_context_in_the_middle() {
if err is json.JsonDecodeError {
assert err.line == 1
assert err.character == 40
assert err.message == 'Syntax: invalid value. Unexpected character after string_ end'
assert err.message == 'Syntax: invalid value. Unexpected character after string end'
}
return
}

View file

@ -44,7 +44,7 @@ enum ValueKind {
unknown
array
object
string_
string
number
boolean
null
@ -56,7 +56,7 @@ fn (k ValueKind) str() string {
.unknown { 'unknown' }
.array { 'array' }
.object { 'object' }
.string_ { 'string' }
.string { 'string' }
.number { 'number' }
.boolean { 'boolean' }
.null { 'null' }