compress,coroutines,encoding: handle C calls in .v files (part of enabling -W impure-v as default) (#19769)

This commit is contained in:
JalonSolov 2023-11-05 10:38:03 -05:00 committed by GitHub
parent 9fa1f8e275
commit 9ec8807dbe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 51 deletions

View file

@ -21,7 +21,7 @@ fn test_long_encoding() {
// //
encoded_size := base64.encode_in_buffer(s_original, ebuffer) encoded_size := base64.encode_in_buffer(s_original, ebuffer)
mut encoded_in_buf := []u8{len: encoded_size} mut encoded_in_buf := []u8{len: encoded_size}
unsafe { C.memcpy(encoded_in_buf.data, ebuffer, encoded_size) } unsafe { vmemcpy(encoded_in_buf.data, ebuffer, encoded_size) }
assert input_size * 4 / 3 == encoded_size assert input_size * 4 / 3 == encoded_size
assert encoded_in_buf[0] == `Y` assert encoded_in_buf[0] == `Y`
assert encoded_in_buf[1] == `W` assert encoded_in_buf[1] == `W`
@ -38,7 +38,7 @@ fn test_long_encoding() {
decoded_size := base64.decode_in_buffer(s_encoded, dbuffer) decoded_size := base64.decode_in_buffer(s_encoded, dbuffer)
assert decoded_size == input_size assert decoded_size == input_size
mut decoded_in_buf := []u8{len: decoded_size} mut decoded_in_buf := []u8{len: decoded_size}
unsafe { C.memcpy(decoded_in_buf.data, dbuffer, decoded_size) } unsafe { vmemcpy(decoded_in_buf.data, dbuffer, decoded_size) }
assert decoded_in_buf == s_original assert decoded_in_buf == s_original
mut s := 0 mut s := 0

View file

@ -1,17 +1,11 @@
/* // utf-8 utility string functions
utf-8 util //
// Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
Copyright (c) 2019-2023 Dario Deledda. All rights reserved. // Use of this source code is governed by an MIT license
Use of this source code is governed by an MIT license // that can be found in the LICENSE file.
that can be found in the LICENSE file.
This file contains utilities for utf8 strings
*/
module utf8 module utf8
/* // Utility functions
Utility functions
*/
// len return the length as number of unicode chars from a string // len return the length as number of unicode chars from a string
pub fn len(s string) int { pub fn len(s string) int {
@ -111,30 +105,24 @@ pub fn reverse(s string) string {
return str_array.join('') return str_array.join('')
} }
/* // Conversion functions
Conversion functions
*/
// to_upper return an uppercase string from a string // to_upper return an uppercase string from a string
pub fn to_upper(s string) string { pub fn to_upper(s string) string {
return up_low(s, true) return convert_case(s, true)
} }
// to_lower return an lowercase string from a string // to_lower return an lowercase string from a string
pub fn to_lower(s string) string { pub fn to_lower(s string) string {
return up_low(s, false) return convert_case(s, false)
} }
/* // Punctuation functions
Punctuation functions
The "western" function search on a small table, that is quicker than
the global unicode table search. **Use only for western chars**.
*/
// //
// The "western" function search on a small table, that is quicker than
// the global unicode table search. **Use only for western chars**.
// Western // Western
//
// is_punct return true if the string[index] byte is the start of a unicode western punctuation // is_punct return true if the string[index] byte is the start of a unicode western punctuation
pub fn is_punct(s string, index int) bool { pub fn is_punct(s string, index int) bool {
@ -191,9 +179,7 @@ pub fn is_uchar_punct(uchar int) bool {
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0 return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0
} }
//
// Global // Global
//
// is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation // is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation
pub fn is_global_punct(s string, index int) bool { pub fn is_global_punct(s string, index int) bool {
@ -205,11 +191,9 @@ pub fn is_uchar_global_punct(uchar int) bool {
return find_punct_in_table(uchar, utf8.unicode_punct) != 0 return find_punct_in_table(uchar, utf8.unicode_punct) != 0
} }
/* // Private functions
Private functions
*/
// Raw to_lower utf-8 function // utf8_to_lower raw utf-8 to_lower function
fn utf8_to_lower(in_cp int) int { fn utf8_to_lower(in_cp int) int {
mut cp := in_cp mut cp := in_cp
if (0x0041 <= cp && 0x005a >= cp) || (0x00c0 <= cp && 0x00d6 >= cp) if (0x0041 <= cp && 0x005a >= cp) || (0x00c0 <= cp && 0x00d6 >= cp)
@ -311,7 +295,7 @@ fn utf8_to_lower(in_cp int) int {
return cp return cp
} }
// Raw to_upper utf-8 function // utf8_to_upper raw utf-8 to_upper function
fn utf8_to_upper(in_cp int) int { fn utf8_to_upper(in_cp int) int {
mut cp := in_cp mut cp := in_cp
if (0x0061 <= cp && 0x007a >= cp) || (0x00e0 <= cp && 0x00f6 >= cp) if (0x0061 <= cp && 0x007a >= cp) || (0x00e0 <= cp && 0x00f6 >= cp)
@ -413,12 +397,12 @@ fn utf8_to_upper(in_cp int) int {
return cp return cp
} }
// convert_case converts letter cases
// //
// if upper_flag == true then make low ==> upper conversion // if upper_flag == true then convert lowercase ==> uppercase
// if upper_flag == false then make upper ==> low conversion // if upper_flag == false then convert uppercase ==> lowercase
// [direct_array_access]
// up_low make the dirt job fn convert_case(s string, upper_flag bool) string {
fn up_low(s string, upper_flag bool) string {
mut index := 0 mut index := 0
mut tab_char := 0 mut tab_char := 0
mut str_res := unsafe { malloc_noscan(s.len + 1) } mut str_res := unsafe { malloc_noscan(s.len + 1) }
@ -429,11 +413,15 @@ fn up_low(s string, upper_flag bool) string {
if ch_len == 1 { if ch_len == 1 {
if upper_flag == true { if upper_flag == true {
unsafe { unsafe {
str_res[index] = u8(C.toupper(s.str[index])) // Subtract 0x20 from ASCII lowercase to convert to uppercase.
c := s[index]
str_res[index] = if c >= 0x61 && c <= 0x7a { c & 0xdf } else { c }
} }
} else { } else {
unsafe { unsafe {
str_res[index] = u8(C.tolower(s.str[index])) // Add 0x20 to ASCII uppercase to convert to lowercase.
c := s[index]
str_res[index] = if c >= 0x41 && c <= 0x5a { c | 0x20 } else { c }
} }
} }
} else if ch_len > 1 && ch_len < 5 { } else if ch_len > 1 && ch_len < 5 {
@ -529,16 +517,14 @@ fn up_low(s string, upper_flag bool) string {
// for c compatibility set the ending 0 // for c compatibility set the ending 0
unsafe { unsafe {
str_res[index] = 0 str_res[index] = 0
// C.printf("str_res: %s\n--------------\n",str_res)
return tos(str_res, s.len) return tos(str_res, s.len)
} }
} }
// find punct in lockup table // find_punct_in_table looks for valid punctuation in table
[direct_array_access]
fn find_punct_in_table(in_code int, in_table []int) int { fn find_punct_in_table(in_code int, in_table []int) int {
// // uses simple binary search
// We will use a simple binary search
//
mut first_index := 0 mut first_index := 0
mut last_index := (in_table.len) mut last_index := (in_table.len)
@ -562,15 +548,14 @@ fn find_punct_in_table(in_code int, in_table []int) int {
break break
} }
} }
// C.printf("not found.\n")
return 0 return 0
} }
/* // Unicode punctuation chars
Unicode punctuation chars //
// source: http://www.unicode.org/faq/punctuation_symbols.html
source: http://www.unicode.org/faq/punctuation_symbols.html
*/
const ( const (
// Western punctuation mark // Western punctuation mark
// Character Name Browser Image // Character Name Browser Image