From 721328ef5855b8589fc39fd926a7e06ef0b4c225 Mon Sep 17 00:00:00 2001 From: Dominik Pytlewski Date: Sun, 25 Sep 2022 21:54:46 +0200 Subject: [PATCH] os: fix os.read_file and os.read_bytes for 0 sized /proc/ files on Linux (fix #15852) (#15853) --- vlib/builtin/cfns.c.v | 2 +- vlib/os/os.c.v | 92 +++++++++++++++++++++++++--------------- vlib/os/os_test.v | 56 ++++++++++++++++++++++++ vlib/strings/builder.c.v | 11 +++++ 4 files changed, 126 insertions(+), 35 deletions(-) diff --git a/vlib/builtin/cfns.c.v b/vlib/builtin/cfns.c.v index f287f4d71c..23a6761dba 100644 --- a/vlib/builtin/cfns.c.v +++ b/vlib/builtin/cfns.c.v @@ -122,7 +122,7 @@ fn C.chdir(path &char) int fn C.rewind(stream &C.FILE) int -fn C.ftell(&C.FILE) int +fn C.ftell(&C.FILE) isize fn C.stat(&char, voidptr) int diff --git a/vlib/os/os.c.v b/vlib/os/os.c.v index 36306ab269..3b2f19d1e7 100644 --- a/vlib/os/os.c.v +++ b/vlib/os/os.c.v @@ -1,5 +1,7 @@ module os +import strings + #include // #include #include @@ -13,8 +15,6 @@ fn C.readlink(pathname &char, buf &char, bufsiz usize) int fn C.getline(voidptr, voidptr, voidptr) int -fn C.ftell(fp voidptr) i64 - fn C.sigaction(int, voidptr, int) int fn C.open(&char, int, ...int) int @@ -44,52 +44,76 @@ pub fn read_bytes(path string) ?[]u8 { defer { C.fclose(fp) } - cseek := C.fseek(fp, 0, C.SEEK_END) - if cseek != 0 { - return error('fseek failed') + fsize := find_cfile_size(fp)? + if fsize == 0 { + mut sb := slurp_file_in_builder(fp)? + return unsafe { sb.reuse_as_plain_u8_array() } } - fsize := C.ftell(fp) - if fsize < 0 { - return error('ftell failed') - } - len := int(fsize) - // On some systems C.ftell can return values in the 64-bit range - // that, when cast to `int`, can result in values below 0. - if i64(len) < fsize { - return error('$fsize cast to int results in ${int(fsize)})') - } - C.rewind(fp) - mut res := []u8{len: len} - nr_read_elements := int(C.fread(res.data, len, 1, fp)) + mut res := []u8{len: fsize} + nr_read_elements := int(C.fread(res.data, 1, fsize, fp)) if nr_read_elements == 0 && fsize > 0 { return error('fread failed') } - res.trim(nr_read_elements * len) + res.trim(nr_read_elements) return res } +fn find_cfile_size(fp &C.FILE) ?int { + // NB: Musl's fseek returns -1 for virtual files, while Glibc's fseek returns 0 + cseek := C.fseek(fp, 0, C.SEEK_END) + raw_fsize := C.ftell(fp) + if raw_fsize != 0 && cseek != 0 { + return error('fseek failed') + } + if cseek != 0 && raw_fsize < 0 { + return error('ftell failed') + } + len := int(raw_fsize) + // For files > 2GB, C.ftell can return values that, when cast to `int`, can result in values below 0. + if i64(len) < raw_fsize { + return error('int($raw_fsize) cast results in $len') + } + C.rewind(fp) + return len +} + +const buf_size = 4096 + +// slurp_file_in_builder reads an entire file into a strings.Builder chunk by chunk, without relying on its file size. +// It is intended for reading 0 sized files, or a dynamic files in a virtual filesystem like /proc/cpuinfo. +// For these, we can not allocate all memory in advance (since we do not know the final size), and so we have no choice +// but to read the file in `buf_size` chunks. +[manualfree] +fn slurp_file_in_builder(fp &C.FILE) ?strings.Builder { + buf := [os.buf_size]u8{} + mut sb := strings.new_builder(os.buf_size) + for { + mut read_bytes := fread(&buf[0], 1, os.buf_size, fp) or { + if err is none { + break + } + unsafe { sb.free() } + return err + } + unsafe { sb.write_ptr(&buf[0], read_bytes) } + } + return sb +} + // read_file reads the file in `path` and returns the contents. +[manualfree] pub fn read_file(path string) ?string { mode := 'rb' mut fp := vfopen(path, mode)? defer { C.fclose(fp) } - cseek := C.fseek(fp, 0, C.SEEK_END) - if cseek != 0 { - return error('fseek failed') - } - fsize := C.ftell(fp) - if fsize < 0 { - return error('ftell failed') - } - // C.fseek(fp, 0, SEEK_SET) // same as `C.rewind(fp)` below - C.rewind(fp) - allocate := int(fsize) - // On some systems C.ftell can return values in the 64-bit range - // that, when cast to `int`, can result in values below 0. - if i64(allocate) < fsize { - return error('$fsize cast to int results in ${int(fsize)})') + allocate := find_cfile_size(fp)? + if allocate == 0 { + mut sb := slurp_file_in_builder(fp)? + res := sb.str() + unsafe { sb.free() } + return res } unsafe { mut str := malloc_noscan(allocate + 1) diff --git a/vlib/os/os_test.v b/vlib/os/os_test.v index c0434dd733..1191c26d3a 100644 --- a/vlib/os/os_test.v +++ b/vlib/os/os_test.v @@ -47,6 +47,29 @@ fn test_open_file() { os.rm(filename) or { panic(err) } } +fn test_read_file_from_virtual_filesystem() { + $if linux { + mounts := os.read_file('/proc/mounts')? + + // it is not empty, contains some mounting such as root filesystem: /dev/x / ext4 rw 0 0 + assert mounts.len > 20 + assert mounts.contains('/') + assert mounts.contains(' ') + } +} + +fn test_read_binary_from_virtual_filesystem() { + $if linux { + mounts_raw := os.read_bytes('/proc/mounts')? + mounts := mounts_raw.bytestr() + + // it is not empty, contains some mounting such as root filesystem: /dev/x / ext4 rw 0 0 + assert mounts.len > 20 + assert mounts.contains('/') + assert mounts.contains(' ') + } +} + fn test_open_file_binary() { filename := './test1.dat' hello := 'hello \n world!' @@ -870,3 +893,36 @@ fn test_command() { // dump( cmd_to_fail ) assert cmd_to_fail.exit_code != 0 // 2 on linux, 1 on macos } + +fn test_reading_from_proc_cpuinfo() { + // This test is only for plain linux systems (they have a /proc virtual filesystem). + $if android { + assert true + return + } + $if !linux { + assert true + return + } + info := os.read_file('/proc/cpuinfo')? + assert info.len > 0 + assert info.contains('processor') + assert info.ends_with('\n\n') + + info_bytes := os.read_bytes('/proc/cpuinfo')? + assert info_bytes.len > 0 + assert info.len == info_bytes.len +} + +fn test_reading_from_empty_file() { + empty_file := os.join_path_single(tfolder, 'empty_file.txt') + os.rm(empty_file) or {} + assert !os.exists(empty_file) + os.write_file(empty_file, '')? + assert os.exists(empty_file) + content := os.read_file(empty_file)? + assert content.len == 0 + content_bytes := os.read_bytes(empty_file)? + assert content_bytes.len == 0 + os.rm(empty_file)? +} diff --git a/vlib/strings/builder.c.v b/vlib/strings/builder.c.v index 34c3afdfc4..39b16211e8 100644 --- a/vlib/strings/builder.c.v +++ b/vlib/strings/builder.c.v @@ -16,6 +16,17 @@ pub fn new_builder(initial_size int) Builder { return res } +// reuse_as_plain_u8_array allows using the Builder instance as a plain []u8 return value. +// It is useful, when you have accumulated data in the builder, that you want to +// pass/access as []u8 later, without copying or freeing the buffer. +// NB: you *should NOT use* the string builder instance after calling this method. +// Use only the return value after calling this method. +[unsafe] +pub fn (mut b Builder) reuse_as_plain_u8_array() []u8 { + unsafe { b.flags.clear(.noslices) } + return *b +} + // write_ptr writes `len` bytes provided byteptr to the accumulated buffer [unsafe] pub fn (mut b Builder) write_ptr(ptr &u8, len int) {