Compare commits

...

2 commits

Author SHA1 Message Date
Larsimusrex
24f91280d9
decoder2: add support for decoding utf-16 surrogates, produced by some JSON encoder implementations (Python, Java, C#) (#25193)
Some checks failed
Graphics CI / gg-regressions (push) Waiting to run
vlib modules CI / build-module-docs (push) Waiting to run
Shy and PV CI / v-compiles-puzzle-vibes (push) Waiting to run
Sanitized CI / sanitize-address-msvc (push) Waiting to run
Sanitized CI / tests-sanitize-address-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-clang (push) Waiting to run
Sanitized CI / sanitize-undefined-gcc (push) Waiting to run
Sanitized CI / sanitize-address-gcc (push) Waiting to run
Sanitized CI / sanitize-memory-clang (push) Waiting to run
sdl CI / v-compiles-sdl-examples (push) Waiting to run
Time CI / time-linux (push) Waiting to run
Time CI / time-macos (push) Waiting to run
Time CI / time-windows (push) Waiting to run
toml CI / toml-module-pass-external-test-suites (push) Waiting to run
Tools CI / tools-linux (clang) (push) Waiting to run
Tools CI / tools-linux (gcc) (push) Waiting to run
Tools CI / tools-linux (tcc) (push) Waiting to run
Tools CI / tools-macos (clang) (push) Waiting to run
Tools CI / tools-windows (gcc) (push) Waiting to run
Tools CI / tools-windows (msvc) (push) Waiting to run
Tools CI / tools-windows (tcc) (push) Waiting to run
Tools CI / tools-docker-ubuntu-musl (push) Waiting to run
vab CI / vab-compiles-v-examples (push) Waiting to run
vab CI / v-compiles-os-android (push) Waiting to run
json decoder benchmark CI / json-encode-benchmark (push) Has been cancelled
2025-08-30 13:02:39 +03:00
Delyan Angelov
ae8134705b
veb: fix a spurious error with Chromium, for POST requests, split into 2 parts, 1st with only http headers, without body (fix #25191) (#25195) 2025-08-30 12:56:21 +03:00
4 changed files with 212 additions and 39 deletions

View file

@ -0,0 +1,95 @@
import log
import veb
import time
import net
import x.json2
import net.http
const port = 31228
pub struct Context {
veb.Context
}
pub struct App {
pub mut:
started chan bool
}
pub fn (mut app App) before_accept_loop() {
app.started <- true
}
@['/data/:filename'; post]
fn (mut app App) data(mut ctx Context, filename string) veb.Result {
content_type := ctx.get_header(http.CommonHeader.content_type) or { return ctx.no_content() }
f := http.FileData{
filename: filename
content_type: content_type
data: ctx.req.data
}
log.info('Received ${filename} with content_type ${content_type} and length ${f.data.len}')
return ctx.json(f)
}
const svg_image_content = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="-8 -308 316 316" width="316" height="316"><g fill-opacity="0" stroke="#000" xmlns="http://www.w3.org/2000/svg"><path xmlns="http://www.w3.org/2000/svg" d="M0 0 l -1.8369701987210297e-14 -100 m -1.8369701987210297e-14 -100 l -1.8369701987210297e-14 -100 l 100 -2.4492935982947064e-14 m 100 -2.4492935982947064e-14 l 100 -2.4492935982947064e-14 l 3.061616997868383e-14 100 m 3.061616997868383e-14 100 l 3.061616997868383e-14 100 l -100 3.6739403974420595e-14 m -100 3.6739403974420595e-14 l -100 3.6739403974420595e-14" stroke="#000000" stroke-width="5"></path></g></svg>'
fn test_veb_app_start() {
log.info('starting watchdog ...')
spawn fn () {
log.info('watchdog running')
time.sleep(10 * time.second)
log.info('exiting...')
exit(0)
}()
mut app := &App{}
spawn veb.run_at[App, Context](mut app, port: port)
_ := <-app.started
log.info('app started')
}
fn test_make_request() {
mut client := net.dial_tcp('127.0.0.1:${port}')!
defer { client.close() or {} }
client.write_string('POST /data/Seeker.svg HTTP/1.1\r
Host: localhost:8090\r
User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0\r
Accept: */*\r
Accept-Language: en-US,en;q=0.5\r
Accept-Encoding: gzip, deflate, br, zstd\r
Content-Type: image/svg+xml\r
Content-Length: 618\r
Origin: null\r
Connection: close\r
Sec-Fetch-Dest: empty\r
Sec-Fetch-Mode: cors\r
Sec-Fetch-Site: cross-site\r
Priority: u=4\r
\r
')! // "
time.sleep(25 * time.millisecond)
client.write_string(svg_image_content)!
mut res := []u8{}
mut buf := []u8{len: 512}
for {
read_len := client.read(mut buf) or { break }
if read_len == 0 {
break
}
res << buf[0..read_len]
}
response := res.bytestr()
assert response.starts_with('HTTP/1.1 200 OK')
assert response.contains('Content-Length: 706')
assert response.contains('Content-Type: application/json')
payload := response.all_after('\r\n\r\n')
r := json2.decode[http.FileData](payload)!
dump(r.filename)
dump(r.content_type)
assert r.filename == 'Seeker.svg'
assert r.content_type == 'image/svg+xml'
assert r.data.starts_with('<svg xmlns=')
assert r.data.ends_with('</svg>')
assert r.data == svg_image_content
}

View file

@ -134,6 +134,9 @@ mut:
pub fn (mut params RequestParams) request_done(fd int) { pub fn (mut params RequestParams) request_done(fd int) {
params.incomplete_requests[fd] = http.Request{} params.incomplete_requests[fd] = http.Request{}
params.idx[fd] = 0 params.idx[fd] = 0
$if trace_handle_read ? {
eprintln('>>>>> fd: ${fd} | request_done.')
}
} }
interface BeforeAcceptApp { interface BeforeAcceptApp {
@ -240,10 +243,8 @@ fn handle_timeout(mut pv picoev.Picoev, mut params RequestParams, fd int) {
handle: fd handle: fd
is_blocking: false is_blocking: false
} }
fast_send_resp(mut conn, http_408) or {} fast_send_resp(mut conn, http_408) or {}
pv.close_conn(fd) pv.close_conn(fd)
params.request_done(fd) params.request_done(fd)
} }
@ -263,19 +264,16 @@ fn handle_write_file(mut pv picoev.Picoev, mut params RequestParams, fd int) {
if bytes_to_write > max_write { if bytes_to_write > max_write {
bytes_to_write = max_write bytes_to_write = max_write
} }
data := unsafe { malloc(bytes_to_write) } data := unsafe { malloc(bytes_to_write) }
defer { defer {
unsafe { free(data) } unsafe { free(data) }
} }
mut conn := &net.TcpConn{ mut conn := &net.TcpConn{
sock: net.tcp_socket_from_handle_raw(fd) sock: net.tcp_socket_from_handle_raw(fd)
handle: fd handle: fd
is_blocking: false is_blocking: false
write_timeout: params.timeout_in_seconds * time.second write_timeout: params.timeout_in_seconds * time.second
} }
params.file_responses[fd].file.read_into_ptr(data, bytes_to_write) or { params.file_responses[fd].file.read_into_ptr(data, bytes_to_write) or {
params.file_responses[fd].done() params.file_responses[fd].done()
pv.close_conn(fd) pv.close_conn(fd)
@ -301,17 +299,14 @@ fn handle_write_file(mut pv picoev.Picoev, mut params RequestParams, fd int) {
@[direct_array_access] @[direct_array_access]
fn handle_write_string(mut pv picoev.Picoev, mut params RequestParams, fd int) { fn handle_write_string(mut pv picoev.Picoev, mut params RequestParams, fd int) {
mut bytes_to_write := int(params.string_responses[fd].str.len - params.string_responses[fd].pos) mut bytes_to_write := int(params.string_responses[fd].str.len - params.string_responses[fd].pos)
if bytes_to_write > max_write { if bytes_to_write > max_write {
bytes_to_write = max_write bytes_to_write = max_write
} }
mut conn := &net.TcpConn{ mut conn := &net.TcpConn{
sock: net.tcp_socket_from_handle_raw(fd) sock: net.tcp_socket_from_handle_raw(fd)
handle: fd handle: fd
is_blocking: false is_blocking: false
} }
// pointer magic to start at the correct position in the buffer // pointer magic to start at the correct position in the buffer
data := unsafe { params.string_responses[fd].str.str + params.string_responses[fd].pos } data := unsafe { params.string_responses[fd].str.str + params.string_responses[fd].pos }
actual_written := send_string_ptr(mut conn, data, bytes_to_write) or { actual_written := send_string_ptr(mut conn, data, bytes_to_write) or {
@ -342,7 +337,6 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
handle: fd handle: fd
is_blocking: false is_blocking: false
} }
// cap the max_read to 8KB // cap the max_read to 8KB
mut reader := io.new_buffered_reader(reader: conn, cap: max_read) mut reader := io.new_buffered_reader(reader: conn, cap: max_read)
defer { defer {
@ -350,12 +344,14 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
reader.free() reader.free()
} }
} }
// take the previous incomplete request // take the previous incomplete request
mut req := params.incomplete_requests[fd] mut req := params.incomplete_requests[fd]
// check if there is an incomplete request for this file descriptor // check if there is an incomplete request for this file descriptor
if params.idx[fd] == 0 { if params.idx[fd] == 0 {
$if trace_handle_read ? {
eprintln('>>>>> fd: ${fd} | start of request parsing')
}
// this is the start of a new request, setup the connection, and read the headers:
// set the read and write timeout according to picoev settings when the // set the read and write timeout according to picoev settings when the
// connection is first encountered // connection is first encountered
conn.set_read_timeout(params.timeout_in_seconds) conn.set_read_timeout(params.timeout_in_seconds)
@ -370,7 +366,7 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
// the buffered reader was empty meaning that the client probably // the buffered reader was empty meaning that the client probably
// closed the connection. // closed the connection.
pv.close_conn(fd) pv.close_conn(fd)
params.incomplete_requests[fd] = http.Request{} params.request_done(fd)
return return
} }
if reader.total_read >= max_read { if reader.total_read >= max_read {
@ -379,37 +375,45 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
eprintln('[veb] error parsing request: too large') eprintln('[veb] error parsing request: too large')
fast_send_resp(mut conn, http_413) or {} fast_send_resp(mut conn, http_413) or {}
pv.close_conn(fd) pv.close_conn(fd)
params.incomplete_requests[fd] = http.Request{} params.request_done(fd)
return return
} }
} }
if params.idx[fd] == -1 {
// this is for sure a continuation of a previous request, where the first part contained only headers;
// make sure that we are ready to accept the body and account for every byte in it, by setting the counter to 0:
params.idx[fd] = 0
$if trace_handle_read ? {
eprintln('>>>>> fd: ${fd} | continuation of request, where the first part contained headers')
}
}
// check if the request has a body // check if the request has a body
content_length := req.header.get(.content_length) or { '0' } content_length := req.header.get(.content_length) or { '0' }
if content_length.int() > 0 { content_length_i := content_length.int()
if content_length_i > 0 {
mut max_bytes_to_read := max_read - reader.total_read mut max_bytes_to_read := max_read - reader.total_read
mut bytes_to_read := content_length.int() - params.idx[fd] mut bytes_to_read := content_length_i - params.idx[fd]
// cap the bytes to read to 8KB for the body, including the request headers if any // cap the bytes to read to 8KB for the body, including the request headers if any
if bytes_to_read > max_read - reader.total_read { if bytes_to_read > max_read - reader.total_read {
bytes_to_read = max_read - reader.total_read bytes_to_read = max_read - reader.total_read
} }
mut buf_ptr := params.buf mut buf_ptr := params.buf
unsafe { unsafe {
buf_ptr += fd * max_read // pointer magic buf_ptr += fd * max_read // pointer magic
} }
// convert to []u8 for BufferedReader // convert to []u8 for BufferedReader
mut buf := unsafe { buf_ptr.vbytes(max_bytes_to_read) } mut buf := unsafe { buf_ptr.vbytes(max_bytes_to_read) }
n := reader.read(mut buf) or { n := reader.read(mut buf) or {
if reader.total_read > 0 { if reader.total_read > 0 {
// the headers were parsed in this cycle, but the body has not been // The headers were parsed in this cycle, but the body has not been sent yet. No need to error.
// sent yet. No need to error params.idx[fd] = -1 // avoid reparsing the headers on the next call.
params.incomplete_requests[fd] = req
$if trace_handle_read ? {
eprintln('>>>>> fd: ${fd} | request headers were parsed, but the body has not been parsed yet | params.idx[fd]: ${params.idx[fd]} | content_length_i: ${content_length_i}')
}
return return
} }
eprintln('[veb] error reading request body: ${err}') eprintln('[veb] error reading request body: ${err}')
if err is io.Eof { if err is io.Eof {
// we expect more data to be send, but an Eof error occurred, meaning // we expect more data to be send, but an Eof error occurred, meaning
// that there is no more data to be read from the socket. // that there is no more data to be read from the socket.
@ -423,17 +427,14 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
).join(headers_close) ).join(headers_close)
)) or {} )) or {}
} }
pv.close_conn(fd) pv.close_conn(fd)
params.incomplete_requests[fd] = http.Request{} params.request_done(fd)
params.idx[fd] = 0
return return
} }
// there is no more data to be sent, but it is less than the Content-Length header // there is no more data to be sent, but it is less than the Content-Length header
// so it is a mismatch of body length and content length. // so it is a mismatch of body length and content length.
// Or if there is more data received then the Content-Length header specified // Or if there is more data received then the Content-Length header specified
if (n == 0 && params.idx[fd] != 0) || params.idx[fd] + n > content_length.int() { if (n == 0 && params.idx[fd] != 0) || params.idx[fd] + n > content_length_i {
fast_send_resp(mut conn, http.new_response( fast_send_resp(mut conn, http.new_response(
status: .bad_request status: .bad_request
body: 'Mismatch of body length and Content-Length header' body: 'Mismatch of body length and Content-Length header'
@ -442,29 +443,31 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
value: 'text/plain' value: 'text/plain'
).join(headers_close) ).join(headers_close)
)) or {} )) or {}
pv.close_conn(fd) pv.close_conn(fd)
params.incomplete_requests[fd] = http.Request{} params.request_done(fd)
params.idx[fd] = 0
return return
} else if n < bytes_to_read || params.idx[fd] + n < content_length.int() { } else if n < bytes_to_read || params.idx[fd] + n < content_length_i {
// request is incomplete wait until the socket becomes ready to read again // request is incomplete wait until the socket becomes ready to read again
params.idx[fd] += n
// TODO: change this to a memcpy function? // TODO: change this to a memcpy function?
req.data += buf[0..n].bytestr() req.data += buf[0..n].bytestr()
params.incomplete_requests[fd] = req params.incomplete_requests[fd] = req
params.idx[fd] += n
$if trace_handle_read ? {
eprintln('>>>>> request is NOT complete, fd: ${fd} | n: ${n} | req.data.len: ${req.data.len} | params.idx[fd]: ${params.idx[fd]}')
}
return return
} else { } else {
// request is complete: n = bytes_to_read // request is complete: n = bytes_to_read
params.idx[fd] += n
req.data += buf[0..n].bytestr() req.data += buf[0..n].bytestr()
params.idx[fd] += n
$if trace_handle_read ? {
eprintln('>>>>> request is NOW COMPLETE, fd: ${fd} | n: ${n} | req.data.len: ${req.data.len}')
}
} }
} }
defer { defer {
params.request_done(fd) params.request_done(fd)
} }
if completed_context := handle_request[A, X](mut conn, req, params) { if completed_context := handle_request[A, X](mut conn, req, params) {
if completed_context.takeover { if completed_context.takeover {
// the connection should be kept open, but removed from the picoev loop. // the connection should be kept open, but removed from the picoev loop.
@ -473,13 +476,11 @@ fn handle_read[A, X](mut pv picoev.Picoev, mut params RequestParams, fd int) {
pv.delete(fd) pv.delete(fd)
return return
} }
// TODO: At this point the Context can safely be freed when this function returns. // TODO: At this point the Context can safely be freed when this function returns.
// The user will have to clone the context if the context object should be kept. // The user will have to clone the context if the context object should be kept.
// defer { // defer {
// completed_context.free() // completed_context.free()
// } // }
match completed_context.return_type { match completed_context.return_type {
.normal { .normal {
// small optimization: if the response is small write it immediately // small optimization: if the response is small write it immediately

View file

@ -412,11 +412,41 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! {
string_buffer << `\t` string_buffer << `\t`
} }
`u` { `u` {
string_buffer << rune(strconv.parse_uint(decoder.json[ unicode_point := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position + string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!).bytes() string_index + 4], 16, 32)!)
string_index += 4 string_index += 4
if unicode_point < 0xD800 { // normal utf-8
string_buffer << unicode_point.bytes()
} else if unicode_point >= 0xDC00 { // trail surrogate -> invalid
decoder.decode_error('Got trail surrogate: ${u32(unicode_point):04X} before head surrogate.')!
} else { // head surrogate -> treat as utf-16
if string_index > string_info.length - 6 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
if decoder.json[string_info.position + string_index..
string_info.position + string_index + 2] != '\\u' {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got no valid escape sequence.')!
}
string_index += 2
unicode_point2 := rune(strconv.parse_uint(decoder.json[
string_info.position + string_index..string_info.position +
string_index + 4], 16, 32)!)
string_index += 4
if unicode_point2 < 0xDC00 {
decoder.decode_error('Expected a trail surrogate after a head surrogate, but got ${u32(unicode_point):04X}.')!
}
final_unicode_point := (unicode_point2 & 0x3FF) +
((unicode_point & 0x3FF) << 10) + 0x10000
string_buffer << final_unicode_point.bytes()
}
} }
else {} // has already been checked else {} // has already been checked
} }

View file

@ -10,3 +10,50 @@ fn test_decode_escaped_string() {
assert escaped_strings == decoded_strings assert escaped_strings == decoded_strings
} }
fn test_surrogate() {
assert decoder2.decode[string](r'"\ud83d\ude00"')! == '😀'
assert decoder2.decode[string](r'"\ud83d\ude00 text"')! == '😀 text'
}
fn test_invalid_surrogate() {
if x := decoder2.decode[string](r'"\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
}
}
if x := decoder2.decode[string](r'"\ud83d\n\n\n\n"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got no valid escape sequence.'
}
}
if x := decoder2.decode[string](r'"\ud83d\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Expected a trail surrogate after a head surrogate, but got D83D.'
}
}
if x := decoder2.decode[string](r'"\ude00\ud83d"') {
assert false
} else {
if err is decoder2.JsonDecodeError {
assert err.line == 1
assert err.character == 1
assert err.message == 'Data: Got trail surrogate: DE00 before head surrogate.'
}
}
}