mirror of
https://github.com/vlang/v.git
synced 2025-09-13 22:42:26 +03:00
builtin: add a rune iterator method to strings, allowing for for i, r in s.runes_iterator() {
without first allocating an array for all the runes (#24769)
This commit is contained in:
parent
502f0e7e77
commit
194db24829
5 changed files with 91 additions and 20 deletions
|
@ -2979,3 +2979,40 @@ fn data_to_hex_string(data &u8, len int) string {
|
|||
hex[dst] = 0
|
||||
return tos(hex, dst)
|
||||
}
|
||||
|
||||
pub struct RunesIterator {
|
||||
mut:
|
||||
s string
|
||||
i int
|
||||
}
|
||||
|
||||
// runes_iterator creates an iterator over all the runes in the given string `s`.
|
||||
// It can be used in `for r in s.runes_iterator() {`, as a direct substitute to
|
||||
// calling .runes(): `for r in s.runes() {`, which needs an intermediate allocation
|
||||
// of an array.
|
||||
pub fn (s string) runes_iterator() RunesIterator {
|
||||
return RunesIterator{
|
||||
s: s
|
||||
i: 0
|
||||
}
|
||||
}
|
||||
|
||||
// next is the method that will be called for each iteration in `for r in s.runes_iterator() {`
|
||||
pub fn (mut ri RunesIterator) next() ?rune {
|
||||
for ri.i >= ri.s.len {
|
||||
return none
|
||||
}
|
||||
char_len := utf8_char_len(unsafe { ri.s.str[ri.i] })
|
||||
if char_len == 1 {
|
||||
res := unsafe { ri.s.str[ri.i] }
|
||||
ri.i++
|
||||
return res
|
||||
}
|
||||
start := &u8(unsafe { &ri.s.str[ri.i] })
|
||||
len := if ri.s.len - 1 >= ri.i + char_len { char_len } else { ri.s.len - ri.i }
|
||||
ri.i += char_len
|
||||
if char_len > 4 {
|
||||
return 0
|
||||
}
|
||||
return rune(impl_utf8_to_utf32(start, len))
|
||||
}
|
||||
|
|
32
vlib/builtin/string_iterator_test.v
Normal file
32
vlib/builtin/string_iterator_test.v
Normal file
|
@ -0,0 +1,32 @@
|
|||
fn check(s string) {
|
||||
srunes := s.runes()
|
||||
println('')
|
||||
println('> s: ${s}')
|
||||
println('> s.len: ${s.len:-4}')
|
||||
println('> srunes.len: ${srunes.len:-4}')
|
||||
mut itera_ := []rune{}
|
||||
for r in s.runes_iterator() {
|
||||
itera_ << r
|
||||
}
|
||||
println('> srunes: ${srunes}')
|
||||
println('> iterated: ${itera_}')
|
||||
assert srunes == itera_
|
||||
}
|
||||
|
||||
fn test_ascii() {
|
||||
check('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
|
||||
}
|
||||
|
||||
fn test_mixed() {
|
||||
check('abc,あいうえお,привет,❄☕❀💰')
|
||||
}
|
||||
|
||||
fn test_emoji_and_for_i_r_in_iterator() {
|
||||
s := '❄☕❀💰'
|
||||
check(s)
|
||||
srunes := s.runes()
|
||||
for i, r in s.runes_iterator() {
|
||||
eprintln('> i: ${i} | r: ${r}')
|
||||
assert srunes[i] == r
|
||||
}
|
||||
}
|
|
@ -78,34 +78,36 @@ pub fn utf32_decode_to_buffer(code u32, mut buf &u8) int {
|
|||
// it is used in vlib/builtin/string.v,
|
||||
// and also in vlib/v/gen/c/cgen.v
|
||||
pub fn (_rune string) utf32_code() int {
|
||||
if res := _rune.bytes().utf8_to_utf32() {
|
||||
return int(res)
|
||||
}
|
||||
if _rune.len > 4 {
|
||||
return 0
|
||||
}
|
||||
return int(impl_utf8_to_utf32(&u8(_rune.str), _rune.len))
|
||||
}
|
||||
|
||||
// convert array of utf8 bytes to single utf32 value
|
||||
// will error if more than 4 bytes are submitted
|
||||
@[direct_array_access]
|
||||
pub fn (_bytes []u8) utf8_to_utf32() !rune {
|
||||
if _bytes.len == 0 {
|
||||
return 0
|
||||
}
|
||||
// return ASCII unchanged
|
||||
if _bytes.len == 1 {
|
||||
return rune(_bytes[0])
|
||||
}
|
||||
if _bytes.len > 4 {
|
||||
return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum')
|
||||
}
|
||||
return impl_utf8_to_utf32(&u8(_bytes.data), _bytes.len)
|
||||
}
|
||||
|
||||
mut b := u8(int(_bytes[0]))
|
||||
|
||||
b = b << _bytes.len
|
||||
@[direct_array_access]
|
||||
fn impl_utf8_to_utf32(_bytes &u8, _bytes_len int) rune {
|
||||
if _bytes_len == 0 {
|
||||
return 0
|
||||
}
|
||||
// return ASCII unchanged
|
||||
if _bytes_len == 1 {
|
||||
return unsafe { rune(_bytes[0]) }
|
||||
}
|
||||
mut b := u8(int(unsafe { _bytes[0] }))
|
||||
b = b << _bytes_len
|
||||
mut res := rune(b)
|
||||
mut shift := 6 - _bytes.len
|
||||
for i := 1; i < _bytes.len; i++ {
|
||||
c := rune(_bytes[i])
|
||||
mut shift := 6 - _bytes_len
|
||||
for i := 1; i < _bytes_len; i++ {
|
||||
c := rune(unsafe { _bytes[i] })
|
||||
res = rune(res) << shift
|
||||
res |= c & 63 // 0x3f
|
||||
shift = 6
|
||||
|
|
|
@ -5,5 +5,5 @@ _result_ok(&(string[]) { s }, (_result*)(&_t2), sizeof(string));
|
|||
} else {
|
||||
return (_result_string){ .is_error=true, .err=_v_error(_S("empty")), .data={E_STRUCT} };
|
||||
}
|
||||
return _t1;
|
||||
return _t2;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue