utf8, readline: clarify UTF-8 vs UTF-32 usage (see also #22461) (#22558)

This commit is contained in:
Pepper Gray 2024-10-18 10:18:31 +02:00 committed by GitHub
parent decea1b188
commit ce8f62146b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 14 additions and 9 deletions

View file

@ -634,6 +634,10 @@ println(country[0]) // Output: 78
println(country[0].ascii_str()) // Output: N
```
If you want the code point from a specific `string` index or other more advanced
utf8 processing and conversions, refer to the
[vlib/encoding.utf8](https://modules.vlang.io/encoding.utf8.html) module.
Both single and double quotes can be used to denote strings. For consistency, `vfmt` converts double
quotes to single quotes unless the string contains a single quote character.
@ -783,7 +787,7 @@ and related modules [strings](https://modules.vlang.io/strings.html),
### Runes
A `rune` represents a single Unicode character and is an alias for `u32`.
A `rune` represents a single UTF-32 encoded Unicode character and is an alias for `u32`.
To denote them, use <code>`</code> (backticks) :
```v

View file

@ -3,6 +3,7 @@
// that can be found in the LICENSE file.
module builtin
// utf8_char_len returns the length in bytes of a UTF-8 encoded codepoint that starts with the byte `b`
pub fn utf8_char_len(b u8) int {
return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1
}

View file

@ -73,7 +73,7 @@ pub fn get_rune(s string, index int) rune {
return res
}
// raw_index - get the raw character from the string by the given index value.
// raw_index - get the raw unicode character from the UTF-8 string by the given index value as UTF-8 string.
// example: utf8.raw_index('我是V Lang', 1) => '是'
pub fn raw_index(s string, index int) string {
mut r := []rune{}

View file

@ -25,7 +25,7 @@ struct Termios {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.
@ -63,7 +63,7 @@ pub fn (mut r Readline) read_line(prompt string) !string {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.

View file

@ -10,7 +10,7 @@ struct Termios {}
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.

View file

@ -101,7 +101,7 @@ pub fn (r Readline) read_char() !int {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.
@ -152,7 +152,7 @@ pub fn (mut r Readline) read_line(prompt string) !string {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.

View file

@ -20,7 +20,7 @@ struct Termios {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.
@ -56,7 +56,7 @@ pub fn (mut r Readline) read_line(prompt string) !string {
// read_line_utf8 blocks execution in a loop and awaits user input
// characters from a terminal until `EOF` or `Enter` key is encountered
// in the input stream.
// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or
// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or
// an error if the line is empty.
// The `prompt` `string` is output as a prefix text for the input capturing.
// read_line_utf8 is the main method of the `readline` module and `Readline` struct.