mirror of
https://github.com/vlang/v.git
synced 2025-09-16 07:52:32 +03:00
encoding.utf8: add more tests for UTF-8 strings (#24544)
This commit is contained in:
parent
a5c8b4f94a
commit
31c6db51ba
2 changed files with 91 additions and 0 deletions
|
@ -48,6 +48,19 @@ fn (mut s Utf8State) seq(r0 bool, r1 bool, is_tail bool) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
/* Check UTF-8 Byte sequences according to Unicode Standard
|
||||
* https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/
|
||||
* Code Points 1st 2s 3s 4s
|
||||
* U+0000..U+007F 00..7F
|
||||
* U+0080..U+07FF C2..DF 80..BF
|
||||
* U+0800..U+0FFF E0 A0..BF 80..BF
|
||||
* U+1000..U+CFFF E1..EC 80..BF 80..BF
|
||||
* U+D000..U+D7FF ED 80..9F 80..BF
|
||||
* U+E000..U+FFFF EE..EF 80..BF 80..BF
|
||||
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
||||
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
||||
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||
*/
|
||||
fn (mut s Utf8State) next_state(c u8) {
|
||||
// sequence 1
|
||||
if s.index == 0 {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue