mirror of
https://github.com/vlang/v.git
synced 2025-09-13 14:32:26 +03:00
builtin: add a rune iterator method to strings, allowing for for i, r in s.runes_iterator() {
without first allocating an array for all the runes (#24769)
This commit is contained in:
parent
502f0e7e77
commit
194db24829
5 changed files with 91 additions and 20 deletions
|
@ -74,9 +74,9 @@ fn __print_assert_failure(i &VAssertMetaInfo) {
|
||||||
eprintln('${i.fpath}:${i.line_nr + 1}: FAIL: fn ${i.fn_name}: assert ${i.src}')
|
eprintln('${i.fpath}:${i.line_nr + 1}: FAIL: fn ${i.fn_name}: assert ${i.src}')
|
||||||
if i.op.len > 0 && i.op != 'call' {
|
if i.op.len > 0 && i.op != 'call' {
|
||||||
if i.llabel == i.lvalue {
|
if i.llabel == i.lvalue {
|
||||||
eprintln(' left value: ${i.llabel}')
|
eprintln(' left value: ${i.llabel}')
|
||||||
} else {
|
} else {
|
||||||
eprintln(' left value: ${i.llabel} = ${i.lvalue}')
|
eprintln(' left value: ${i.llabel} = ${i.lvalue}')
|
||||||
}
|
}
|
||||||
if i.rlabel == i.rvalue {
|
if i.rlabel == i.rvalue {
|
||||||
eprintln(' right value: ${i.rlabel}')
|
eprintln(' right value: ${i.rlabel}')
|
||||||
|
|
|
@ -2979,3 +2979,40 @@ fn data_to_hex_string(data &u8, len int) string {
|
||||||
hex[dst] = 0
|
hex[dst] = 0
|
||||||
return tos(hex, dst)
|
return tos(hex, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct RunesIterator {
|
||||||
|
mut:
|
||||||
|
s string
|
||||||
|
i int
|
||||||
|
}
|
||||||
|
|
||||||
|
// runes_iterator creates an iterator over all the runes in the given string `s`.
|
||||||
|
// It can be used in `for r in s.runes_iterator() {`, as a direct substitute to
|
||||||
|
// calling .runes(): `for r in s.runes() {`, which needs an intermediate allocation
|
||||||
|
// of an array.
|
||||||
|
pub fn (s string) runes_iterator() RunesIterator {
|
||||||
|
return RunesIterator{
|
||||||
|
s: s
|
||||||
|
i: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// next is the method that will be called for each iteration in `for r in s.runes_iterator() {`
|
||||||
|
pub fn (mut ri RunesIterator) next() ?rune {
|
||||||
|
for ri.i >= ri.s.len {
|
||||||
|
return none
|
||||||
|
}
|
||||||
|
char_len := utf8_char_len(unsafe { ri.s.str[ri.i] })
|
||||||
|
if char_len == 1 {
|
||||||
|
res := unsafe { ri.s.str[ri.i] }
|
||||||
|
ri.i++
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
start := &u8(unsafe { &ri.s.str[ri.i] })
|
||||||
|
len := if ri.s.len - 1 >= ri.i + char_len { char_len } else { ri.s.len - ri.i }
|
||||||
|
ri.i += char_len
|
||||||
|
if char_len > 4 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return rune(impl_utf8_to_utf32(start, len))
|
||||||
|
}
|
||||||
|
|
32
vlib/builtin/string_iterator_test.v
Normal file
32
vlib/builtin/string_iterator_test.v
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
fn check(s string) {
|
||||||
|
srunes := s.runes()
|
||||||
|
println('')
|
||||||
|
println('> s: ${s}')
|
||||||
|
println('> s.len: ${s.len:-4}')
|
||||||
|
println('> srunes.len: ${srunes.len:-4}')
|
||||||
|
mut itera_ := []rune{}
|
||||||
|
for r in s.runes_iterator() {
|
||||||
|
itera_ << r
|
||||||
|
}
|
||||||
|
println('> srunes: ${srunes}')
|
||||||
|
println('> iterated: ${itera_}')
|
||||||
|
assert srunes == itera_
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_ascii() {
|
||||||
|
check('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_mixed() {
|
||||||
|
check('abc,あいうえお,привет,❄☕❀💰')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_emoji_and_for_i_r_in_iterator() {
|
||||||
|
s := '❄☕❀💰'
|
||||||
|
check(s)
|
||||||
|
srunes := s.runes()
|
||||||
|
for i, r in s.runes_iterator() {
|
||||||
|
eprintln('> i: ${i} | r: ${r}')
|
||||||
|
assert srunes[i] == r
|
||||||
|
}
|
||||||
|
}
|
|
@ -78,34 +78,36 @@ pub fn utf32_decode_to_buffer(code u32, mut buf &u8) int {
|
||||||
// it is used in vlib/builtin/string.v,
|
// it is used in vlib/builtin/string.v,
|
||||||
// and also in vlib/v/gen/c/cgen.v
|
// and also in vlib/v/gen/c/cgen.v
|
||||||
pub fn (_rune string) utf32_code() int {
|
pub fn (_rune string) utf32_code() int {
|
||||||
if res := _rune.bytes().utf8_to_utf32() {
|
if _rune.len > 4 {
|
||||||
return int(res)
|
return 0
|
||||||
}
|
}
|
||||||
return 0
|
return int(impl_utf8_to_utf32(&u8(_rune.str), _rune.len))
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert array of utf8 bytes to single utf32 value
|
// convert array of utf8 bytes to single utf32 value
|
||||||
// will error if more than 4 bytes are submitted
|
// will error if more than 4 bytes are submitted
|
||||||
@[direct_array_access]
|
|
||||||
pub fn (_bytes []u8) utf8_to_utf32() !rune {
|
pub fn (_bytes []u8) utf8_to_utf32() !rune {
|
||||||
if _bytes.len == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// return ASCII unchanged
|
|
||||||
if _bytes.len == 1 {
|
|
||||||
return rune(_bytes[0])
|
|
||||||
}
|
|
||||||
if _bytes.len > 4 {
|
if _bytes.len > 4 {
|
||||||
return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum')
|
return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum')
|
||||||
}
|
}
|
||||||
|
return impl_utf8_to_utf32(&u8(_bytes.data), _bytes.len)
|
||||||
|
}
|
||||||
|
|
||||||
mut b := u8(int(_bytes[0]))
|
@[direct_array_access]
|
||||||
|
fn impl_utf8_to_utf32(_bytes &u8, _bytes_len int) rune {
|
||||||
b = b << _bytes.len
|
if _bytes_len == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// return ASCII unchanged
|
||||||
|
if _bytes_len == 1 {
|
||||||
|
return unsafe { rune(_bytes[0]) }
|
||||||
|
}
|
||||||
|
mut b := u8(int(unsafe { _bytes[0] }))
|
||||||
|
b = b << _bytes_len
|
||||||
mut res := rune(b)
|
mut res := rune(b)
|
||||||
mut shift := 6 - _bytes.len
|
mut shift := 6 - _bytes_len
|
||||||
for i := 1; i < _bytes.len; i++ {
|
for i := 1; i < _bytes_len; i++ {
|
||||||
c := rune(_bytes[i])
|
c := rune(unsafe { _bytes[i] })
|
||||||
res = rune(res) << shift
|
res = rune(res) << shift
|
||||||
res |= c & 63 // 0x3f
|
res |= c & 63 // 0x3f
|
||||||
shift = 6
|
shift = 6
|
||||||
|
|
|
@ -5,5 +5,5 @@ _result_ok(&(string[]) { s }, (_result*)(&_t2), sizeof(string));
|
||||||
} else {
|
} else {
|
||||||
return (_result_string){ .is_error=true, .err=_v_error(_S("empty")), .data={E_STRUCT} };
|
return (_result_string){ .is_error=true, .err=_v_error(_S("empty")), .data={E_STRUCT} };
|
||||||
}
|
}
|
||||||
return _t1;
|
return _t2;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue