mirror of
https://github.com/vlang/v.git
synced 2025-09-13 22:42:26 +03:00
vlib: add archive.tar
module to enable reading of .tar ang .tar.gz files (#24995)
This commit is contained in:
parent
b876644e82
commit
a8d75c10b5
13 changed files with 969 additions and 0 deletions
166
examples/archive/tar_gz_reader.v
Normal file
166
examples/archive/tar_gz_reader.v
Normal file
|
@ -0,0 +1,166 @@
|
|||
import archive.tar
|
||||
import flag
|
||||
import net.http
|
||||
import os
|
||||
import term
|
||||
|
||||
const default_url = 'https://github.com/vlang/v/archive/refs/tags/v0.1.3.tar.gz'
|
||||
|
||||
@[heap]
|
||||
struct Context {
|
||||
url string // Web starting with http:// or https://. Local starting with file:///
|
||||
chunks bool // true: decompress with callback
|
||||
debug int // print debug lines
|
||||
max_blocks int // if max_blocks > 0 and is reached stops early.
|
||||
filename string // if filename is found as a path of a data block, stops early.
|
||||
}
|
||||
|
||||
fn (ctx &Context) read_last_block(mut read tar.Read) bool {
|
||||
if ctx.max_blocks > 0 && ctx.max_blocks < read.get_block_number() {
|
||||
read.stop_early = true
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
fn new_context() !&Context {
|
||||
mut fp := flag.new_flag_parser(os.args)
|
||||
fp.application('tar_gz_reader')
|
||||
fp.version('0.0.20250721')
|
||||
fp.description('Reads into memory selected sections of *.tar.gz. archives from https or home_dir.')
|
||||
fp.skip_executable()
|
||||
ctx := &Context{
|
||||
url: fp.string('url', `u`, default_url, 'archive *.tar.gz URL, default(${default_url}). Start name with file:/// for local')
|
||||
chunks: fp.bool('chunks', `c`, false, 'decompress with chunks to reduce RAM usage, default(false)')
|
||||
debug: fp.int('debug', `d`, 0, 'prints blocks: 1=other, 2:+dirs, 3=+files, 4=+data, default(0=silent)')
|
||||
max_blocks: fp.int('max_blocks', `m`, 0, 'maximum blocks to read, stop early. Default(0=read all)')
|
||||
filename: fp.string('filename', `f`, '', 'filename content complete print, stop early. Default(empty means none)')
|
||||
}
|
||||
additional := fp.finalize()!
|
||||
if additional.len > 0 {
|
||||
println('unprocessed args ${additional.join_lines()}')
|
||||
}
|
||||
return ctx
|
||||
}
|
||||
|
||||
// Downloader downloads a *.tar.gz using HTTP chunks
|
||||
struct Downloader {
|
||||
mut:
|
||||
chunks int
|
||||
data []u8
|
||||
}
|
||||
|
||||
fn new_downloader(url string) !&Downloader {
|
||||
mut downloader := &Downloader{}
|
||||
params := http.DownloaderParams{
|
||||
downloader: downloader
|
||||
}
|
||||
if url.starts_with('http://') || url.starts_with('https://') {
|
||||
http.download_file_with_progress(url, '', params)!
|
||||
} else if url.starts_with('file:///') {
|
||||
path := '${os.home_dir()}/${url[8..]}'
|
||||
println('path ${path}')
|
||||
downloader.data = os.read_bytes(path)!
|
||||
}
|
||||
return downloader
|
||||
}
|
||||
|
||||
fn (mut d Downloader) on_start(mut request http.Request, path string) ! {}
|
||||
|
||||
fn (mut d Downloader) on_chunk(request &http.Request, chunk []u8, already_received u64, expected u64) ! {
|
||||
if expected == 0 {
|
||||
return
|
||||
}
|
||||
d.chunks++
|
||||
d.data << chunk
|
||||
}
|
||||
|
||||
fn (mut d Downloader) on_finish(request &http.Request, response &http.Response) ! {}
|
||||
|
||||
struct FileReader implements tar.Reader {
|
||||
ctx &Context
|
||||
mut:
|
||||
filepath string
|
||||
content []u8
|
||||
}
|
||||
|
||||
fn new_file_reader(ctx &Context) FileReader {
|
||||
return FileReader{
|
||||
ctx: ctx
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut f FileReader) other_block(mut read tar.Read, details string) {
|
||||
if f.ctx.read_last_block(mut read) {
|
||||
return
|
||||
}
|
||||
if f.ctx.debug > 0 {
|
||||
row := 'OTHER block:${read.get_block_number():6} ${read.get_special()} ${details} ${read.get_path()} '
|
||||
println(term.colorize(term.bright_yellow, row))
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut f FileReader) dir_block(mut read tar.Read, size u64) {
|
||||
if f.ctx.read_last_block(mut read) {
|
||||
return
|
||||
}
|
||||
if f.ctx.debug > 1 {
|
||||
row := 'DIR block:${read.get_block_number():6} ${read.get_path()} size:${size}'
|
||||
println(term.colorize(term.green, row))
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut f FileReader) file_block(mut read tar.Read, size u64) {
|
||||
if f.ctx.read_last_block(mut read) {
|
||||
return
|
||||
}
|
||||
path := read.get_path()
|
||||
if f.ctx.debug > 2 {
|
||||
row := ' FILE block:${read.get_block_number():6} ${path} size:${size}'
|
||||
println(term.colorize(term.bright_blue, row))
|
||||
}
|
||||
if f.ctx.filename != '' && f.filepath == '' && path.ends_with(f.ctx.filename) {
|
||||
f.filepath = path
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut f FileReader) data_block(mut read tar.Read, data []u8, pending int) {
|
||||
if f.ctx.read_last_block(mut read) {
|
||||
return
|
||||
}
|
||||
path := read.get_path()
|
||||
if f.ctx.debug > 3 {
|
||||
println(' DATA block:${read.get_block_number():6} ${path} len:${data.len} pend:${pending}')
|
||||
}
|
||||
if f.ctx.filename != '' {
|
||||
if f.filepath == path {
|
||||
f.content << data
|
||||
if pending == 0 {
|
||||
// our file of interest data is complete
|
||||
read.stop_early = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
ctx := new_context()!
|
||||
reader := FileReader{
|
||||
ctx: ctx
|
||||
}
|
||||
mut untar := tar.new_untar(reader)
|
||||
mut decompressor := tar.new_decompresor(untar)
|
||||
downloader := new_downloader(ctx.url)!
|
||||
if ctx.chunks {
|
||||
decompressor.read_chunks(downloader.data)!
|
||||
} else {
|
||||
decompressor.read_all(downloader.data)!
|
||||
}
|
||||
println('-'.repeat(80))
|
||||
println('Download: ${ctx.url} chunks:${downloader.chunks} bytes=${downloader.data.len}')
|
||||
println('Untar: ${untar}')
|
||||
println('Content: Path:${reader.filepath} bytes:${reader.content.len}')
|
||||
println('-'.repeat(80))
|
||||
println('${reader.content.bytestr()}')
|
||||
println('-'.repeat(80))
|
||||
}
|
3
vlib/archive/README.md
Normal file
3
vlib/archive/README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
## Description
|
||||
|
||||
`archive` is a namespace for different archive formats like `tar` or `zip`.
|
33
vlib/archive/tar/README.md
Normal file
33
vlib/archive/tar/README.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
## Description
|
||||
|
||||
`tar` is a module to access tar archives.
|
||||
|
||||
Tape archives (tar) are a file format for storing a sequence of files that can be read and written
|
||||
as streams. This module covers the reading of the basic sections of archives produced by GNU tools
|
||||
like Linux command `tar -xvf` but in memory instead modifing the filesystem. Parses directories,
|
||||
files, and file's content and manage paths longer than 100 chars.
|
||||
|
||||
### Read Efficiency
|
||||
|
||||
An entire tar file can be read in memory or by chunks. Keeps in memory a single decompressed
|
||||
[chunk](https://modules.vlang.io/compress.gzip.html#decompress_with_callback) of 32 KB at a time
|
||||
and also keeps in memory a single tar block of 512 bytes at a time. Convert paths to strings until
|
||||
needed and the user reader implementation can stop early the reading process.
|
||||
|
||||
### Read Example
|
||||
|
||||
The tar blocks are parsed and some fields are passed to `Reader` implemented methods.
|
||||
|
||||
```v
|
||||
import os
|
||||
import archive.tar
|
||||
|
||||
fn main() {
|
||||
os.chdir(@VMODROOT) or {}
|
||||
path := 'archive/tar/testdata/life.tar.gz'
|
||||
reader := tar.new_debug_reader()
|
||||
tar.read_tar_gz_file(path, reader)!
|
||||
}
|
||||
```
|
||||
Look also in `examples` folder the `tar_gz_reader.v` program.
|
||||
|
277
vlib/archive/tar/reader.v
Normal file
277
vlib/archive/tar/reader.v
Normal file
|
@ -0,0 +1,277 @@
|
|||
module tar
|
||||
|
||||
import compress.gzip
|
||||
import os
|
||||
|
||||
// read_tar_gz_file decompresses a given local file and reads all the blocks
|
||||
// with a given reader.
|
||||
pub fn read_tar_gz_file(path string, reader Reader) ! {
|
||||
tar_gz := os.read_bytes(path)!
|
||||
all_blocks := gzip.decompress(tar_gz)!
|
||||
mut untar := Untar{
|
||||
reader: reader
|
||||
}
|
||||
untar.read_all_blocks(all_blocks)!
|
||||
}
|
||||
|
||||
// Read is used by Untar to call Reader implemented methods.
|
||||
// The implementor can read the block's `get_block_number()` and `get_path()`
|
||||
// and can set the field `stop_early` to true to suspend the reading.
|
||||
pub struct Read {
|
||||
mut:
|
||||
block_number int
|
||||
special BlockSpecial
|
||||
prefix_len int
|
||||
prefix_buf [131]u8
|
||||
separator bool
|
||||
path_len int
|
||||
path_buf [100]u8
|
||||
|
||||
long_path &LongPath = unsafe { nil }
|
||||
pub mut:
|
||||
stop_early bool
|
||||
}
|
||||
|
||||
// set_short_path sets Read path with the tar block strings `prefix` and `path`.
|
||||
// Block's `prefix` C string max length is 131 but most of the time is 0.
|
||||
// Block's `path` C string max length is 100. Both `prefix` and `path` are
|
||||
// linked to a V string but converted until is needed, see `get_path()`.
|
||||
fn (mut b Read) set_short_path(buffer [512]u8, separator_after_prefix bool) {
|
||||
// first check if TAR block has a prefix string (0 to 131 chars). The
|
||||
// prefix will be other than '' the TAR block filepath len is > 100.
|
||||
b.prefix_len = 0
|
||||
for i := 345; i < 345 + 131; i++ {
|
||||
letter := buffer[i]
|
||||
if letter == 0 {
|
||||
break // first 0 found means prefix C string is complete.
|
||||
}
|
||||
b.prefix_buf[b.prefix_len] = letter
|
||||
b.prefix_len++
|
||||
}
|
||||
|
||||
b.separator = separator_after_prefix
|
||||
|
||||
// most of the time there is path for blocks like dirs and regular files:
|
||||
b.path_len = 0
|
||||
for i := 0; i < 100; i++ {
|
||||
letter := buffer[i]
|
||||
if letter == 0 {
|
||||
break // first 0 found means path C string is complete.
|
||||
}
|
||||
b.path_buf[b.path_len] = letter
|
||||
b.path_len++
|
||||
}
|
||||
}
|
||||
|
||||
// set_long_path sets Read path with the long path reference.
|
||||
fn (mut b Read) set_long_path(long_path &LongPath) {
|
||||
b.long_path = unsafe { long_path }
|
||||
}
|
||||
|
||||
// get_path returns the path of this read. The path is valid for blocks of types
|
||||
// directory, file and file data.
|
||||
pub fn (b Read) get_path() string {
|
||||
if b.long_path != unsafe { nil } {
|
||||
return b.long_path.get_path()
|
||||
}
|
||||
|
||||
mut str := []u8{}
|
||||
if b.prefix_len > 0 {
|
||||
str << b.prefix_buf[0..b.prefix_len]
|
||||
}
|
||||
if b.prefix_len > 0 && b.separator {
|
||||
str << `/`
|
||||
}
|
||||
if b.path_len > 0 {
|
||||
str << b.path_buf[0..b.path_len]
|
||||
}
|
||||
return str.bytestr()
|
||||
}
|
||||
|
||||
// get_block_number returns the consecutive number of this read.
|
||||
pub fn (b Read) get_block_number() int {
|
||||
return b.block_number
|
||||
}
|
||||
|
||||
// get_special returns the special type of the Read.
|
||||
pub fn (b Read) get_special() BlockSpecial {
|
||||
return b.special
|
||||
}
|
||||
|
||||
// str returns a string representation with block number, path, special type and stop early.
|
||||
pub fn (r Read) str() string {
|
||||
return '(block_number:${r.block_number} path:${r.get_path()} special:${r.special} stop_early:${r.stop_early})'
|
||||
}
|
||||
|
||||
// Reader is used to read by Untar to parse the blocks.
|
||||
pub interface Reader {
|
||||
mut:
|
||||
// dir_block is called when untar reads a block of type directory.
|
||||
// Call `Read.get_path()` to get the full name of the directory.
|
||||
// `size` field is zero for directories.
|
||||
// The implementor can set Read's field `stop_early` to suspend the reader.
|
||||
dir_block(mut read Read, size u64)
|
||||
|
||||
// file_block is called when untar reads a block of type filename.
|
||||
// Call `Read.get_path()` to get the full name of the file.
|
||||
// `size` is the expected file size in bytes to be read later.
|
||||
// The implementor can set Read's field `stop_early` to suspend the reader.
|
||||
file_block(mut read Read, size u64)
|
||||
|
||||
// file_block is called when untar reads a block of type filedata.
|
||||
// Call `Read.get_path()` to get the full name of the file data belongs to.
|
||||
// The `data` size is 512 bytes or less. `pending` indicates how many bytes are left to read.
|
||||
// The implementor can inspect the data and use the pending value
|
||||
// to set Read's field `stop_early` to suspend the reader.
|
||||
data_block(mut read Read, data []u8, pending int)
|
||||
|
||||
// other_block is called when untar reads a block type other than directory,
|
||||
// filename or filedata. `Read.get_header()` and 'details' give more info about the block.
|
||||
// `block device` or `FIFO`.
|
||||
// The implementor can set Read's field `stop_early` to suspend the reader.
|
||||
other_block(mut read Read, details string)
|
||||
}
|
||||
|
||||
// DebugReader implements a Reader and prints rows for blocks read
|
||||
// as directories, files, file data blocks and special blocks.
|
||||
pub struct DebugReader implements Reader {
|
||||
}
|
||||
|
||||
// new_debug_reader returns a DebugReader
|
||||
pub fn new_debug_reader() &DebugReader {
|
||||
return &DebugReader{}
|
||||
}
|
||||
|
||||
fn (mut t DebugReader) dir_block(mut read Read, size u64) {
|
||||
println('DIR #${read.get_block_number()} ${read.get_path()}')
|
||||
}
|
||||
|
||||
fn (mut t DebugReader) file_block(mut read Read, size u64) {
|
||||
println('FILE #${read.get_block_number()} path:${read.get_path()} size:${size}')
|
||||
}
|
||||
|
||||
fn (mut t DebugReader) data_block(mut read Read, data []u8, pending int) {
|
||||
println('DATA #${read.get_block_number()} ${read.get_path()} size:${data.len} pending:${pending}')
|
||||
}
|
||||
|
||||
fn (mut t DebugReader) other_block(mut read Read, details string) {
|
||||
println('OTHER #${read.get_block_number()} special:${read.special} ${details}')
|
||||
}
|
||||
|
||||
// ReadResult is returned by ReadResultFn
|
||||
pub enum ReadResult {
|
||||
@continue
|
||||
stop_early
|
||||
end_of_file
|
||||
end_archive
|
||||
overflow
|
||||
}
|
||||
|
||||
type ReadResultFn = fn (block []u8) !ReadResult
|
||||
|
||||
@[heap]
|
||||
pub struct Decompressor {
|
||||
mut:
|
||||
untar &Untar
|
||||
}
|
||||
|
||||
// new_decompressor returns a Decompressor to decompress a tar.gz file
|
||||
// A given Untar with a registered Reader will read the blocks.
|
||||
pub fn new_decompresor(untar &Untar) &Decompressor {
|
||||
return &Decompressor{
|
||||
untar: untar
|
||||
}
|
||||
}
|
||||
|
||||
// read_all decompresses the given `tar_gz` array with all the tar blocks.
|
||||
// Then calls untar method `read_all` to read all the blocks at once.
|
||||
// A read result is returned which can be of the type stop early or an error.
|
||||
pub fn (mut d Decompressor) read_all(tar_gz []u8) !ReadResult {
|
||||
all_blocks := gzip.decompress(tar_gz)!
|
||||
return d.untar.read_all_blocks(all_blocks)!
|
||||
}
|
||||
|
||||
// read_chunks decompresses the given `tar_gz` array by chunks of
|
||||
// 32768 bytes which can hold up to 64 tar blocks of 512 bytes each.
|
||||
// Then calls untar method read_block with ChunksReader dispatcher.
|
||||
// A read result is returned which can be of the type stop early or an error.
|
||||
pub fn (mut d Decompressor) read_chunks(tar_gz []u8) !ReadResult {
|
||||
mut reader := &ChunksReader{
|
||||
read_block_fn: d.untar.read_single_block
|
||||
}
|
||||
callback := fn (chunk []u8, mut reader ChunksReader) int {
|
||||
result := reader.read_blocks(chunk)
|
||||
if result == .continue {
|
||||
return chunk.len // go for more
|
||||
}
|
||||
return 0 // suspend
|
||||
}
|
||||
gzip.decompress_with_callback(tar_gz, callback, reader) or {
|
||||
if reader.result == .continue {
|
||||
return err
|
||||
}
|
||||
return reader.result
|
||||
}
|
||||
return reader.result
|
||||
}
|
||||
|
||||
// ChunkReader has a reusable fixed buffer with maximum length of decompressed chunk
|
||||
// of 32768 bytes plus a maximum previous pending tar block of 512 bytes.
|
||||
struct ChunksReader {
|
||||
mut:
|
||||
read_block_fn ReadResultFn = unsafe { nil }
|
||||
buffer [32768 + 512]u8
|
||||
chunks_counter int
|
||||
pending int // position of the last not sent buffer byte
|
||||
result ReadResult
|
||||
}
|
||||
|
||||
// read_blocks receives a chunk like those of 32k from a gzip decompressor. The chunk is
|
||||
// assumed to be a TAR archive section and is cut in 512 bytes blocks that are sent to
|
||||
// the untar reader one by one. The untar reader result informs this process to continue or
|
||||
// stop early. This process can keep in the buffer the remaining bytes of an incomplete
|
||||
// block and will be send to the untar reader prepended to a next chunk cuts.
|
||||
fn (mut d ChunksReader) read_blocks(chunk []u8) ReadResult {
|
||||
d.chunks_counter++
|
||||
total := d.pending + chunk.len
|
||||
if total > d.buffer.len {
|
||||
assert false, 'Should not occur buffer overflow ${total}'
|
||||
return .overflow
|
||||
}
|
||||
|
||||
// append new chunk after previous incomplete block bytes not sent yet
|
||||
for i, ch in chunk {
|
||||
d.buffer[i + d.pending] = ch
|
||||
}
|
||||
d.pending += chunk.len
|
||||
|
||||
mut cut := 0
|
||||
for {
|
||||
if cut + 512 > d.pending {
|
||||
// after sending all complete blocks move the remaining not sent bytes
|
||||
// to the start of the reused buffer to be prepended before next chunk
|
||||
for i := cut; i < d.pending; i++ {
|
||||
d.buffer[cut - 512] = d.buffer[i]
|
||||
}
|
||||
d.pending -= cut
|
||||
return .continue
|
||||
}
|
||||
|
||||
// send a complete block
|
||||
block := d.buffer[cut..cut + 512]
|
||||
cut += 512
|
||||
d.result = d.read_block_fn(block) or {
|
||||
assert false, 'Should not occur buffer overflow'
|
||||
return .overflow
|
||||
}
|
||||
match d.result {
|
||||
.continue {
|
||||
// try next cut or leave a remaining
|
||||
}
|
||||
else {
|
||||
break // untar error or stop_early
|
||||
}
|
||||
}
|
||||
}
|
||||
return d.result
|
||||
}
|
157
vlib/archive/tar/reader_test.v
Normal file
157
vlib/archive/tar/reader_test.v
Normal file
|
@ -0,0 +1,157 @@
|
|||
module tar
|
||||
|
||||
import os
|
||||
|
||||
fn testsuite_begin() {
|
||||
os.chdir(@VMODROOT) or {}
|
||||
}
|
||||
|
||||
const testdata = 'vlib/archive/tar/testdata'
|
||||
|
||||
// files copied from golang: https://github.com/golang/go/blob/master/src/archive/tar/testdata/file-and-dir.tar
|
||||
fn test_golang_testdata() {
|
||||
// [ ] dir | 0 bytes | folder
|
||||
// [ ] small.txt | 5 bytes | file
|
||||
r1 := new_test_reader('file-and-dir.tar', false)!
|
||||
assert r1.dirs[0] == 'dir/'
|
||||
assert r1.files['small.txt'] == 5
|
||||
assert r1.data['small.txt'] == 'Kilts'.bytes()
|
||||
assert r1.other[0] == 'block:4 special:blank_1 continue'
|
||||
assert r1.other[1] == 'block:5 special:blank_2 end_archive'
|
||||
|
||||
// [ ] small.txt | 5 bytes | file
|
||||
// [ ] small2.txt | 11 bytes | file
|
||||
r2 := new_test_reader('gnu.tar', false)!
|
||||
assert r2.dirs.len == 0
|
||||
assert r2.files['small.txt'] == 5
|
||||
assert r2.files['small2.txt'] == 11
|
||||
assert r2.data['small.txt'] == 'Kilts'.bytes()
|
||||
assert r2.data['small2.txt'] == 'Google.com\n'.bytes()
|
||||
|
||||
// [ ] h1<?><?><?><?>bye | 0 bytes
|
||||
r3 := new_test_reader('gnu-not-utf8.tar', false)!
|
||||
r3_filename := [u8(`h`), `i`, 0x80, 0x81, 0x82, 0x83, `b`, `y`, `e`].bytestr()
|
||||
r3_file_len := r3.files[r3_filename] or { assert false, 'file not found: ${r3_filename}' }
|
||||
assert r3_file_len == 0
|
||||
assert r3.other.len == 2
|
||||
|
||||
// [ ] 0123456789 | 0 bytes
|
||||
r4 := new_test_reader('gnu-long-nul.tar', false)!
|
||||
assert r4.dirs.len == 0
|
||||
r4_filename := '0123456789'
|
||||
r4_file_len := r4.files[r4_filename] or {
|
||||
assert false, 'file ${r4_filename} not found in ${r4.files.keys()}'
|
||||
}
|
||||
assert r4_file_len == 0
|
||||
assert r4.other[0] == 'block:1 special:long_name size:161'
|
||||
assert r4.other[1] == 'block:2 special:long_name data_part:161'
|
||||
|
||||
// [ ] ☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹ | 0 bytes
|
||||
r5 := new_test_reader('gnu-utf8.tar', false)!
|
||||
r5_filename := '☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹'
|
||||
r5_file_len := r5.files[r5_filename] or { assert false, 'file not found: ${r5_filename}' }
|
||||
assert r5_file_len == 0
|
||||
assert r5.other[0] == 'block:1 special:long_name size:163'
|
||||
assert r5.other[1] == 'block:2 special:long_name data_part:163'
|
||||
}
|
||||
|
||||
fn test_long_long_short() {
|
||||
// test long path (human) substitute another long path (chimp) then a normal path (cat)
|
||||
r1 := new_test_reader_gz('life.tar.gz', false)!
|
||||
|
||||
mammal := 'life/Animalia/Chordata/Mammalia'
|
||||
human := '${mammal}/Primates_Haplorhini_Simiiformes/Hominidae_Homininae_Hominini/Homo/Homo sapiens.txt'
|
||||
chimp := '${mammal}/Primates_Haplorhini_Simiiformes/Hominidae_Homininae_Hominini/Pan/Pan troglodytes.txt'
|
||||
cat := '${mammal}/Carnivora_Feliformia/Felidae_Felinae/Felis/Felis catus.txt'
|
||||
assert human.len > 100
|
||||
assert chimp.len > 100
|
||||
assert cat.len <= 100
|
||||
assert r1.files[human] == 35
|
||||
assert r1.files[chimp] == 40
|
||||
assert r1.files[cat] == 33
|
||||
assert r1.texts[human] == 'https://en.wikipedia.org/wiki/Human'
|
||||
assert r1.texts[chimp] == 'https://en.wikipedia.org/wiki/Chimpanzee'
|
||||
assert r1.texts[cat] == 'https://en.wikipedia.org/wiki/Cat'
|
||||
}
|
||||
|
||||
struct TestReader {
|
||||
debug bool
|
||||
mut:
|
||||
dirs []string
|
||||
files map[string]u64
|
||||
data map[string][]u8
|
||||
texts map[string]string
|
||||
other []string
|
||||
|
||||
last_file string
|
||||
last_data []u8
|
||||
}
|
||||
|
||||
// new_test_reader reads files *.tar
|
||||
fn new_test_reader(tar_file string, debug bool) !&TestReader {
|
||||
mut reader := &TestReader{
|
||||
debug: debug
|
||||
}
|
||||
mut untar := Untar{
|
||||
reader: reader
|
||||
}
|
||||
all_blocks := os.read_bytes('${testdata}/${tar_file}')!
|
||||
untar.read_all_blocks(all_blocks)!
|
||||
return reader
|
||||
}
|
||||
|
||||
// new_test_reader_gz reads files *.tar.gz
|
||||
fn new_test_reader_gz(tar_gz_file string, debug bool) !&TestReader {
|
||||
mut reader := &TestReader{
|
||||
debug: debug
|
||||
}
|
||||
mut untar := Untar{
|
||||
reader: reader
|
||||
}
|
||||
mut decompressor := new_decompresor(untar)
|
||||
tar_gz := os.read_bytes('${testdata}/${tar_gz_file}')!
|
||||
decompressor.read_all(tar_gz)!
|
||||
|
||||
return reader
|
||||
}
|
||||
|
||||
fn (mut t TestReader) dir_block(mut read Read, size u64) {
|
||||
t.dirs << read.get_path()
|
||||
if t.debug {
|
||||
println('DIR #${read.get_block_number()} ${read.get_path()}')
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut t TestReader) file_block(mut read Read, size u64) {
|
||||
t.last_file = read.get_path()
|
||||
t.files[t.last_file] = size
|
||||
if t.debug {
|
||||
println('FILE #${read.get_block_number()} ${read.get_path()}')
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut t TestReader) data_block(mut read Read, data []u8, pending int) {
|
||||
path := read.get_path()
|
||||
if t.debug {
|
||||
println('DATA #${read.get_block_number()} ${path}')
|
||||
}
|
||||
if t.last_file == path {
|
||||
t.last_data << data
|
||||
if pending == 0 {
|
||||
t.data[t.last_file] = t.last_data.clone()
|
||||
t.texts[path] = t.last_data.bytestr()
|
||||
if t.debug {
|
||||
println('TEXT #${read.get_block_number()} ${t.last_data.bytestr()}')
|
||||
}
|
||||
t.last_file = ''
|
||||
t.last_data.clear()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut t TestReader) other_block(mut read Read, details string) {
|
||||
t.other << 'block:${read.block_number} special:${read.special} ${details}'
|
||||
if t.debug {
|
||||
println('OTHER #${read.get_block_number()} special:${read.special} ${details}')
|
||||
}
|
||||
}
|
43
vlib/archive/tar/tar.v
Normal file
43
vlib/archive/tar/tar.v
Normal file
|
@ -0,0 +1,43 @@
|
|||
module tar
|
||||
|
||||
// ustart header block octets
|
||||
// Field | Offset | Length
|
||||
// --------------------------
|
||||
// name | 0 | 100
|
||||
// mode | 100 | 8
|
||||
// uid | 108 | 8
|
||||
// gid | 116 | 8
|
||||
// size | 124 | 12
|
||||
// mtime | 136 | 12
|
||||
// chksum | 148 | 8
|
||||
// typeflag | 156 | 1
|
||||
// linkname | 157 | 100
|
||||
// magic | 257 | 6
|
||||
// version | 263 | 2
|
||||
// uname | 265 | 32
|
||||
// gname | 297 | 32
|
||||
// devmajor | 329 | 8
|
||||
// devminor | 337 | 8
|
||||
// prefix | 345 | 155
|
||||
|
||||
pub enum BlockHeader as u8 {
|
||||
file = u8(`0`) // 0x30
|
||||
hard_link = u8(`1`) // 0x31
|
||||
sym_link = u8(`2`) // 0x32
|
||||
char_dev = u8(`3`) // 0x33
|
||||
block_dev = u8(`4`) // 0x34
|
||||
dir = u8(`5`) // 0x35
|
||||
fifo = u8(`6`) // 0x36
|
||||
long_name = u8(`L`) // 0x4c = 76 dec
|
||||
global = u8(`g`) // 0x67 pax
|
||||
}
|
||||
|
||||
pub enum BlockSpecial {
|
||||
no // for headers `0`,`5` or data blocks
|
||||
blank_1 // first blank block: continue
|
||||
blank_2 // second blank block: end of archiv
|
||||
ignore // for headers `1`, `2`, `3`, `4`, `6`
|
||||
long_name // for header `L`
|
||||
global // for header `g`
|
||||
unknown // for not header defined
|
||||
}
|
BIN
vlib/archive/tar/testdata/file-and-dir.tar
vendored
Normal file
BIN
vlib/archive/tar/testdata/file-and-dir.tar
vendored
Normal file
Binary file not shown.
BIN
vlib/archive/tar/testdata/gnu-long-nul.tar
vendored
Normal file
BIN
vlib/archive/tar/testdata/gnu-long-nul.tar
vendored
Normal file
Binary file not shown.
BIN
vlib/archive/tar/testdata/gnu-not-utf8.tar
vendored
Normal file
BIN
vlib/archive/tar/testdata/gnu-not-utf8.tar
vendored
Normal file
Binary file not shown.
BIN
vlib/archive/tar/testdata/gnu-utf8.tar
vendored
Normal file
BIN
vlib/archive/tar/testdata/gnu-utf8.tar
vendored
Normal file
Binary file not shown.
BIN
vlib/archive/tar/testdata/gnu.tar
vendored
Normal file
BIN
vlib/archive/tar/testdata/gnu.tar
vendored
Normal file
Binary file not shown.
BIN
vlib/archive/tar/testdata/life.tar.gz
vendored
Normal file
BIN
vlib/archive/tar/testdata/life.tar.gz
vendored
Normal file
Binary file not shown.
290
vlib/archive/tar/untar.v
Normal file
290
vlib/archive/tar/untar.v
Normal file
|
@ -0,0 +1,290 @@
|
|||
module tar
|
||||
|
||||
// Untar uses a reader to parse the contents of a unix tar file.
|
||||
// Reuses a fixed array of 512 bytes to parse each TAR block.
|
||||
@[heap]
|
||||
pub struct Untar {
|
||||
mut:
|
||||
reader Reader
|
||||
max_blocks int
|
||||
buffer [512]u8 // data to parse block
|
||||
read Read // last read to send/receive to/from reader implementation
|
||||
|
||||
state State // true when reading data blocks or long names
|
||||
size int // remaining data size during state_data
|
||||
|
||||
long_path &LongPath = unsafe { nil } // not nil to hold a file long_name
|
||||
|
||||
blank_block int = -1 // last no-data block with all-zeros
|
||||
}
|
||||
|
||||
enum State {
|
||||
header
|
||||
data
|
||||
long_path
|
||||
}
|
||||
|
||||
// new_untar builds a untar with a given Reader.
|
||||
pub fn new_untar(reader Reader) &Untar {
|
||||
return &Untar{
|
||||
reader: reader
|
||||
}
|
||||
}
|
||||
|
||||
// str returns a string representation with max_blocks and last read.
|
||||
pub fn (u Untar) str() string {
|
||||
return 'max_blocks:${u.max_blocks} last_read:${u.read}'
|
||||
}
|
||||
|
||||
// read_all_blocks parses the data blocks of any decompressed *.tar.gz array.
|
||||
// The data blocks length must be divisible by 512.
|
||||
pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult {
|
||||
if blocks.len % 512 != 0 {
|
||||
return error('data_blocks size is not a multiple of 512')
|
||||
}
|
||||
u.max_blocks = blocks.len / 512
|
||||
for i := 0; i < blocks.len; i += 512 {
|
||||
result := u.read_single_block(blocks[i..i + 512])!
|
||||
if result != .continue {
|
||||
return result
|
||||
}
|
||||
}
|
||||
return .end_of_file
|
||||
}
|
||||
|
||||
// read_single_block parses one data block at a time.
|
||||
// The data block length must be 512. Two consecutive no data blocks
|
||||
// have 512 zeroes returns a .end_archive result.
|
||||
pub fn (mut u Untar) read_single_block(block []u8) !ReadResult {
|
||||
if block.len != 512 {
|
||||
return error('data_block size is not 512')
|
||||
}
|
||||
u.read.block_number++ // 1,2,3...
|
||||
|
||||
mut is_blank_block := true
|
||||
for i in 0 .. 512 {
|
||||
u.buffer[i] = block[i]
|
||||
if block[i] != 0 {
|
||||
is_blank_block = false
|
||||
}
|
||||
}
|
||||
match u.state {
|
||||
.header {
|
||||
if is_blank_block {
|
||||
// current non-data block is a blank block
|
||||
prev_block := u.read.block_number - 1
|
||||
result := if u.blank_block == prev_block {
|
||||
// two consecutive blank blocks
|
||||
u.read.special = .blank_2
|
||||
ReadResult.end_archive
|
||||
} else {
|
||||
// first blank block
|
||||
u.read.special = .blank_1
|
||||
ReadResult.continue
|
||||
}
|
||||
u.read.path_len = 0
|
||||
u.reader.other_block(mut u.read, '${result}')
|
||||
u.blank_block = u.read.block_number
|
||||
return result
|
||||
}
|
||||
u.read_header()!
|
||||
}
|
||||
.data {
|
||||
u.read_data()
|
||||
}
|
||||
.long_path {
|
||||
u.read_long_path()
|
||||
}
|
||||
}
|
||||
return if u.read.stop_early {
|
||||
.stop_early
|
||||
} else {
|
||||
.continue
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut u Untar) read_header() ! {
|
||||
u.size = int(u.extract_octal(124, 12))
|
||||
header := u.buffer[156] // pos 0x9c
|
||||
block_header := BlockHeader.from(header) or {
|
||||
u.read.special = .unknown
|
||||
u.read.path_len = 0
|
||||
u.reader.other_block(mut u.read, 'size:${u.size}')
|
||||
return
|
||||
}
|
||||
match block_header {
|
||||
.dir {
|
||||
if !u.checksum_ok() {
|
||||
return error('Checksum error: directory reading:${u.read}')
|
||||
}
|
||||
u.read.special = .no
|
||||
u.read.set_short_path(u.buffer, false)
|
||||
u.reader.dir_block(mut u.read, u64(u.size))
|
||||
// u.state = .header
|
||||
}
|
||||
.file {
|
||||
if !u.checksum_ok() {
|
||||
return error('Checksum error file reading:${u.read}')
|
||||
}
|
||||
u.read.special = .no
|
||||
if u.long_path != unsafe { nil } {
|
||||
u.read.set_long_path(u.long_path)
|
||||
if u.size > 0 {
|
||||
u.state = .data
|
||||
}
|
||||
} else {
|
||||
u.read.set_short_path(u.buffer, true)
|
||||
if u.size > 0 {
|
||||
u.state = .data
|
||||
}
|
||||
}
|
||||
u.reader.file_block(mut u.read, u64(u.size))
|
||||
}
|
||||
.long_name {
|
||||
u.read.special = .long_name
|
||||
u.reader.other_block(mut u.read, 'size:${u.size}')
|
||||
if u.size > 0 {
|
||||
u.state = .long_path
|
||||
u.long_path = new_long_path(u.size)
|
||||
}
|
||||
}
|
||||
.hard_link, .sym_link, .char_dev, .block_dev, .fifo {
|
||||
u.read.special = .ignore
|
||||
u.reader.other_block(mut u.read, block_header.str())
|
||||
}
|
||||
.global {
|
||||
u.read.special = .global
|
||||
u.read.set_short_path(u.buffer, false)
|
||||
u.reader.other_block(mut u.read, 'size:${u.size}')
|
||||
if u.size > 0 {
|
||||
u.state = .data
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reader_data calls Reader.data_block for implementor to collect data parts as file content
|
||||
fn (mut u Untar) read_data() {
|
||||
if u.size > 0 {
|
||||
part := if u.size > 512 { 512 } else { u.size }
|
||||
u.size -= 512
|
||||
pending := if u.size > 0 { u.size } else { 0 }
|
||||
data_part := u.buffer[0..part]
|
||||
u.reader.data_block(mut u.read, data_part, pending)
|
||||
}
|
||||
if u.size <= 0 {
|
||||
u.long_path = unsafe { nil }
|
||||
u.read.long_path = unsafe { nil } // real clear
|
||||
u.state = .header
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut u Untar) read_long_path() {
|
||||
if u.size > 0 {
|
||||
part := if u.size > 512 { 512 } else { u.size }
|
||||
u.size -= 512
|
||||
data_part := u.buffer[0..part]
|
||||
if u.long_path != unsafe { nil } {
|
||||
// this long path field collects the data parts as file long name
|
||||
u.long_path.append(data_part)
|
||||
u.reader.other_block(mut u.read, 'data_part:${data_part.len}')
|
||||
}
|
||||
}
|
||||
if u.size <= 0 {
|
||||
u.state = .header
|
||||
}
|
||||
}
|
||||
|
||||
// extract_path returns the block path for directories and files.
|
||||
fn (mut u Untar) extract_path() string {
|
||||
mut name := []u8{}
|
||||
mut i := 0
|
||||
for {
|
||||
if i >= u.buffer.len {
|
||||
break
|
||||
}
|
||||
letter := u.buffer[i]
|
||||
if letter == 0 {
|
||||
break
|
||||
}
|
||||
name << letter
|
||||
i++
|
||||
}
|
||||
return name.bytestr()
|
||||
}
|
||||
|
||||
// checksum_ok verifies the validity for dir and files blocks.
|
||||
fn (mut u Untar) checksum_ok() bool {
|
||||
mut v := u64(0)
|
||||
for n := 0; n < 512; n++ {
|
||||
if n < 148 || n > 155 {
|
||||
v += u.buffer[n]
|
||||
} else {
|
||||
v += 0x20
|
||||
}
|
||||
}
|
||||
parse := u.extract_octal(148, 8)
|
||||
return v == parse
|
||||
}
|
||||
|
||||
// extract_octal reads an octal number at block position `pos` with a given number of `digits`.
|
||||
fn (mut u Untar) extract_octal(pos int, digits int) u64 {
|
||||
mut i := u64(0)
|
||||
mut p := pos
|
||||
mut n := digits
|
||||
for {
|
||||
if (u.buffer[p] < `0` || u.buffer[p] > `7`) && n > 0 {
|
||||
p++
|
||||
n--
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
for {
|
||||
if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 {
|
||||
i *= 8
|
||||
i += u8(u.buffer[p] - `0`)
|
||||
p++
|
||||
n--
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@[heap]
|
||||
struct LongPath {
|
||||
mut:
|
||||
name []u8
|
||||
last_pos int
|
||||
}
|
||||
|
||||
// new_long_path builds a LongPath with a fixed maximum name size
|
||||
fn new_long_path(size int) &LongPath {
|
||||
return &LongPath{
|
||||
name: []u8{len: size}
|
||||
}
|
||||
}
|
||||
|
||||
// appends copies the data to the
|
||||
fn (mut l LongPath) append(data []u8) {
|
||||
if l.name.len >= l.last_pos + data.len {
|
||||
for i, d in data {
|
||||
l.name[l.last_pos + i] = d
|
||||
}
|
||||
l.last_pos += data.len
|
||||
}
|
||||
}
|
||||
|
||||
// get_path returns the string from name appended as C string.
|
||||
fn (l LongPath) get_path() string {
|
||||
mut s := []u8{}
|
||||
for n in l.name {
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
s << n
|
||||
}
|
||||
return s.bytestr()
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue