v/vlib/archive/tar/untar.v

290 lines
6.4 KiB
V

module tar
// Untar uses a reader to parse the contents of a unix tar file.
// Reuses a fixed array of 512 bytes to parse each TAR block.
@[heap]
pub struct Untar {
mut:
reader Reader
max_blocks int
buffer [512]u8 // data to parse block
read Read // last read to send/receive to/from reader implementation
state State // true when reading data blocks or long names
size int // remaining data size during state_data
long_path &LongPath = unsafe { nil } // not nil to hold a file long_name
blank_block int = -1 // last no-data block with all-zeros
}
enum State {
header
data
long_path
}
// new_untar builds a untar with a given Reader.
pub fn new_untar(reader Reader) &Untar {
return &Untar{
reader: reader
}
}
// str returns a string representation with max_blocks and last read.
pub fn (u Untar) str() string {
return 'max_blocks:${u.max_blocks} last_read:${u.read}'
}
// read_all_blocks parses the data blocks of any decompressed *.tar.gz array.
// The data blocks length must be divisible by 512.
pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult {
if blocks.len % 512 != 0 {
return error('data_blocks size is not a multiple of 512')
}
u.max_blocks = blocks.len / 512
for i := 0; i < blocks.len; i += 512 {
result := u.read_single_block(blocks[i..i + 512])!
if result != .continue {
return result
}
}
return .end_of_file
}
// read_single_block parses one data block at a time.
// The data block length must be 512. Two consecutive no data blocks
// have 512 zeroes returns a .end_archive result.
pub fn (mut u Untar) read_single_block(block []u8) !ReadResult {
if block.len != 512 {
return error('data_block size is not 512')
}
u.read.block_number++ // 1,2,3...
mut is_blank_block := true
for i in 0 .. 512 {
u.buffer[i] = block[i]
if block[i] != 0 {
is_blank_block = false
}
}
match u.state {
.header {
if is_blank_block {
// current non-data block is a blank block
prev_block := u.read.block_number - 1
result := if u.blank_block == prev_block {
// two consecutive blank blocks
u.read.special = .blank_2
ReadResult.end_archive
} else {
// first blank block
u.read.special = .blank_1
ReadResult.continue
}
u.read.path_len = 0
u.reader.other_block(mut u.read, '${result}')
u.blank_block = u.read.block_number
return result
}
u.read_header()!
}
.data {
u.read_data()
}
.long_path {
u.read_long_path()
}
}
return if u.read.stop_early {
.stop_early
} else {
.continue
}
}
fn (mut u Untar) read_header() ! {
u.size = int(u.extract_octal(124, 12))
header := u.buffer[156] // pos 0x9c
block_header := BlockHeader.from(header) or {
u.read.special = .unknown
u.read.path_len = 0
u.reader.other_block(mut u.read, 'size:${u.size}')
return
}
match block_header {
.dir {
if !u.checksum_ok() {
return error('Checksum error: directory reading:${u.read}')
}
u.read.special = .no
u.read.set_short_path(u.buffer, false)
u.reader.dir_block(mut u.read, u64(u.size))
// u.state = .header
}
.file {
if !u.checksum_ok() {
return error('Checksum error file reading:${u.read}')
}
u.read.special = .no
if u.long_path != unsafe { nil } {
u.read.set_long_path(u.long_path)
if u.size > 0 {
u.state = .data
}
} else {
u.read.set_short_path(u.buffer, true)
if u.size > 0 {
u.state = .data
}
}
u.reader.file_block(mut u.read, u64(u.size))
}
.long_name {
u.read.special = .long_name
u.reader.other_block(mut u.read, 'size:${u.size}')
if u.size > 0 {
u.state = .long_path
u.long_path = new_long_path(u.size)
}
}
.hard_link, .sym_link, .char_dev, .block_dev, .fifo {
u.read.special = .ignore
u.reader.other_block(mut u.read, block_header.str())
}
.global {
u.read.special = .global
u.read.set_short_path(u.buffer, false)
u.reader.other_block(mut u.read, 'size:${u.size}')
if u.size > 0 {
u.state = .data
}
}
}
}
// reader_data calls Reader.data_block for implementor to collect data parts as file content
fn (mut u Untar) read_data() {
if u.size > 0 {
part := if u.size > 512 { 512 } else { u.size }
u.size -= 512
pending := if u.size > 0 { u.size } else { 0 }
data_part := u.buffer[0..part]
u.reader.data_block(mut u.read, data_part, pending)
}
if u.size <= 0 {
u.long_path = unsafe { nil }
u.read.long_path = unsafe { nil } // real clear
u.state = .header
}
}
fn (mut u Untar) read_long_path() {
if u.size > 0 {
part := if u.size > 512 { 512 } else { u.size }
u.size -= 512
data_part := u.buffer[0..part]
if u.long_path != unsafe { nil } {
// this long path field collects the data parts as file long name
u.long_path.append(data_part)
u.reader.other_block(mut u.read, 'data_part:${data_part.len}')
}
}
if u.size <= 0 {
u.state = .header
}
}
// extract_path returns the block path for directories and files.
fn (mut u Untar) extract_path() string {
mut name := []u8{}
mut i := 0
for {
if i >= u.buffer.len {
break
}
letter := u.buffer[i]
if letter == 0 {
break
}
name << letter
i++
}
return name.bytestr()
}
// checksum_ok verifies the validity for dir and files blocks.
fn (mut u Untar) checksum_ok() bool {
mut v := u64(0)
for n := 0; n < 512; n++ {
if n < 148 || n > 155 {
v += u.buffer[n]
} else {
v += 0x20
}
}
parse := u.extract_octal(148, 8)
return v == parse
}
// extract_octal reads an octal number at block position `pos` with a given number of `digits`.
fn (mut u Untar) extract_octal(pos int, digits int) u64 {
mut i := u64(0)
mut p := pos
mut n := digits
for {
if (u.buffer[p] < `0` || u.buffer[p] > `7`) && n > 0 {
p++
n--
} else {
break
}
}
for {
if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 {
i *= 8
i += u8(u.buffer[p] - `0`)
p++
n--
} else {
break
}
}
return i
}
@[heap]
struct LongPath {
mut:
name []u8
last_pos int
}
// new_long_path builds a LongPath with a fixed maximum name size
fn new_long_path(size int) &LongPath {
return &LongPath{
name: []u8{len: size}
}
}
// appends copies the data to the
fn (mut l LongPath) append(data []u8) {
if l.name.len >= l.last_pos + data.len {
for i, d in data {
l.name[l.last_pos + i] = d
}
l.last_pos += data.len
}
}
// get_path returns the string from name appended as C string.
fn (l LongPath) get_path() string {
mut s := []u8{}
for n in l.name {
if n == 0 {
break
}
s << n
}
return s.bytestr()
}