mirror of
https://github.com/vlang/v.git
synced 2025-09-13 14:32:26 +03:00
encoding.iconv: fix iconv type cstrict, add support for LOCAL
encoding (#22398)
This commit is contained in:
parent
7b8e059a3c
commit
e2425842b2
3 changed files with 51 additions and 19 deletions
|
@ -16,21 +16,35 @@ fn reverse_u32(src u32) u32 {
|
|||
// vstring_to_encoding convert V string `str` to `tocode` encoding string
|
||||
// tips: use `iconv --list` check for supported encodings
|
||||
pub fn vstring_to_encoding(str string, tocode string) ![]u8 {
|
||||
encoding_name := tocode.to_upper()
|
||||
mut encoding_name := tocode.to_upper()
|
||||
if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
|
||||
return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
|
||||
}
|
||||
return conv(tocode, 'UTF-8', str.str, str.len)
|
||||
if encoding_name == 'LOCAL' {
|
||||
$if windows {
|
||||
encoding_name = 'ANSI'
|
||||
} $else {
|
||||
encoding_name = 'UTF-8'
|
||||
}
|
||||
}
|
||||
return conv(encoding_name, 'UTF-8', str.str, str.len)
|
||||
}
|
||||
|
||||
// encoding_to_vstring converts the given `bytes` using `fromcode` encoding, to a V string (encoded with UTF-8)
|
||||
// tips: use `iconv --list` check for supported encodings
|
||||
pub fn encoding_to_vstring(bytes []u8, fromcode string) !string {
|
||||
encoding_name := fromcode.to_upper()
|
||||
mut encoding_name := fromcode.to_upper()
|
||||
if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
|
||||
return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
|
||||
}
|
||||
mut dst := conv('UTF-8', fromcode, bytes.data, bytes.len)!
|
||||
if encoding_name == 'LOCAL' {
|
||||
$if windows {
|
||||
encoding_name = 'ANSI'
|
||||
} $else {
|
||||
encoding_name = 'UTF-8'
|
||||
}
|
||||
}
|
||||
mut dst := conv('UTF-8', encoding_name, bytes.data, bytes.len)!
|
||||
dst << 0 // add a tail zero, to build a vstring
|
||||
return unsafe { cstring_to_vstring(dst.data) }
|
||||
}
|
||||
|
@ -43,7 +57,15 @@ pub fn encoding_to_vstring(bytes []u8, fromcode string) !string {
|
|||
// for utf32be, it will prepend 0x0000FEFF to the `src`
|
||||
pub fn create_utf_string_with_bom(src []u8, utf_type string) []u8 {
|
||||
mut clone := src.clone()
|
||||
match utf_type.to_upper() {
|
||||
mut encoding_name := utf_type.to_upper()
|
||||
if encoding_name == 'LOCAL' {
|
||||
$if windows {
|
||||
encoding_name = 'ANSI'
|
||||
} $else {
|
||||
encoding_name = 'UTF-8'
|
||||
}
|
||||
}
|
||||
match encoding_name {
|
||||
'UTF8', 'UTF-8' {
|
||||
clone.prepend([u8(0xEF), 0xBB, 0xBF])
|
||||
}
|
||||
|
@ -73,7 +95,15 @@ pub fn create_utf_string_with_bom(src []u8, utf_type string) []u8 {
|
|||
@[direct_array_access]
|
||||
pub fn remove_utf_string_with_bom(src []u8, utf_type string) []u8 {
|
||||
mut clone := src.clone()
|
||||
match utf_type.to_upper() {
|
||||
mut encoding_name := utf_type.to_upper()
|
||||
if encoding_name == 'LOCAL' {
|
||||
$if windows {
|
||||
encoding_name = 'ANSI'
|
||||
} $else {
|
||||
encoding_name = 'UTF-8'
|
||||
}
|
||||
}
|
||||
match encoding_name {
|
||||
'UTF8', 'UTF-8' {
|
||||
if clone.len > 3 {
|
||||
if clone[0] == u8(0xEF) && clone[1] == u8(0xBB) && clone[2] == u8(0xBF) {
|
||||
|
@ -119,10 +149,6 @@ pub fn remove_utf_string_with_bom(src []u8, utf_type string) []u8 {
|
|||
// write_file_encoding write_file convert `text` into `encoding` and writes to a file with the given `path`. If `path` already exists, it will be overwritten.
|
||||
// For `encoding` in UTF8/UTF16/UTF32, if `bom` is true, then a BOM header will write to the file.
|
||||
pub fn write_file_encoding(path string, text string, encoding string, bom bool) ! {
|
||||
encoding_name := encoding.to_upper()
|
||||
if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
|
||||
return error('please use UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
|
||||
}
|
||||
encoding_bytes := vstring_to_encoding(text, encoding)!
|
||||
if bom && encoding.to_upper().starts_with('UTF') {
|
||||
encoding_bom_bytes := create_utf_string_with_bom(encoding_bytes, encoding)
|
||||
|
@ -134,10 +160,6 @@ pub fn write_file_encoding(path string, text string, encoding string, bom bool)
|
|||
|
||||
// read_file_encoding reads the file in `path` with `encoding` and returns the contents
|
||||
pub fn read_file_encoding(path string, encoding string) !string {
|
||||
encoding_name := encoding.to_upper()
|
||||
if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
|
||||
return error('please use UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
|
||||
}
|
||||
encoding_bytes := os.read_file_array[u8](path)
|
||||
encoding_without_bom_bytes := remove_utf_string_with_bom(encoding_bytes, encoding)
|
||||
return encoding_to_vstring(encoding_without_bom_bytes, encoding)!
|
||||
|
|
|
@ -5,9 +5,9 @@ module iconv
|
|||
#include <iconv.h>
|
||||
#flag darwin -liconv
|
||||
|
||||
fn C.iconv_open(tocode &u8, fromcode &u8) voidptr
|
||||
fn C.iconv_open(tocode charptr, fromcode charptr) voidptr
|
||||
fn C.iconv_close(cd voidptr) int
|
||||
fn C.iconv(cd voidptr, inbuf &&u8, inbytesleft &usize, outbuf &&u8, outbytesleft &usize) usize
|
||||
fn C.iconv(cd voidptr, inbuf &charptr, inbytesleft &usize, outbuf &charptr, outbytesleft &usize) usize
|
||||
|
||||
// conv convert `fromcode` encoding string to `tocode` encoding string
|
||||
@[direct_array_access]
|
||||
|
@ -35,7 +35,7 @@ fn conv(tocode string, fromcode string, src &u8, src_len int) ![]u8 {
|
|||
else {}
|
||||
}
|
||||
|
||||
mut cd := C.iconv_open(dst_encoding.str, src_encoding.str)
|
||||
mut cd := C.iconv_open(charptr(dst_encoding.str), charptr(src_encoding.str))
|
||||
if isize(cd) == -1 {
|
||||
return error('platform can\'t convert from ${src_encoding} to ${dst_encoding}')
|
||||
}
|
||||
|
@ -43,8 +43,8 @@ fn conv(tocode string, fromcode string, src &u8, src_len int) ![]u8 {
|
|||
|
||||
mut dst := []u8{len: (src_len + 1) * 4} // this should be enough to hold the dst encoding string
|
||||
|
||||
mut src_ptr := &u8(src)
|
||||
mut dst_ptr := &u8(dst.data)
|
||||
mut src_ptr := charptr(src)
|
||||
mut dst_ptr := charptr(dst.data)
|
||||
mut src_left := usize(src_len)
|
||||
mut dst_left := usize(dst.len)
|
||||
res := C.iconv(cd, &src_ptr, &src_left, &dst_ptr, &dst_left)
|
||||
|
|
|
@ -20,6 +20,11 @@ fn test_vstring_to_encoding() {
|
|||
abc_utf32be := iconv.vstring_to_encoding('abc', 'UTF-32BE')!
|
||||
assert abc_utf32be == [u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99]
|
||||
|
||||
abc_local := iconv.vstring_to_encoding('abc', 'LOCAL')!
|
||||
// Windows LOCAL: ANSI encoding
|
||||
// Linux LOCAL: UTF-8 encoding
|
||||
assert abc_local == [u8(97), 98, 99]
|
||||
|
||||
if abc_not_exist := iconv.vstring_to_encoding('abc', 'encoding_not_exist') {
|
||||
assert false, 'encoding_not_exist'
|
||||
}
|
||||
|
@ -53,6 +58,11 @@ fn test_encoding_to_vstring() {
|
|||
'UTF-32BE')!
|
||||
assert abc_utf32be == 'abc'
|
||||
|
||||
abc_local := iconv.encoding_to_vstring([u8(97), 98, 99], 'LOCAL')!
|
||||
// Windows LOCAL: ANSI encoding
|
||||
// Linux LOCAL: UTF-8 encoding
|
||||
assert abc_local == 'abc'
|
||||
|
||||
if abc_not_exist := iconv.encoding_to_vstring([u8(97), 98, 99], 'encoding_not_exist') {
|
||||
assert false, 'encoding_not_exist'
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue