diff --git a/doc/docs.md b/doc/docs.md index c185815593..b98bfc3c22 100644 --- a/doc/docs.md +++ b/doc/docs.md @@ -634,6 +634,10 @@ println(country[0]) // Output: 78 println(country[0].ascii_str()) // Output: N ``` +If you want the code point from a specific `string` index or other more advanced +utf8 processing and conversions, refer to the +[vlib/encoding.utf8](https://modules.vlang.io/encoding.utf8.html) module. + Both single and double quotes can be used to denote strings. For consistency, `vfmt` converts double quotes to single quotes unless the string contains a single quote character. @@ -783,7 +787,7 @@ and related modules [strings](https://modules.vlang.io/strings.html), ### Runes -A `rune` represents a single Unicode character and is an alias for `u32`. +A `rune` represents a single UTF-32 encoded Unicode character and is an alias for `u32`. To denote them, use ` (backticks) : ```v diff --git a/vlib/builtin/utf8.v b/vlib/builtin/utf8.v index 01328f0ef1..5755cdda25 100644 --- a/vlib/builtin/utf8.v +++ b/vlib/builtin/utf8.v @@ -3,6 +3,7 @@ // that can be found in the LICENSE file. module builtin +// utf8_char_len returns the length in bytes of a UTF-8 encoded codepoint that starts with the byte `b` pub fn utf8_char_len(b u8) int { return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1 } diff --git a/vlib/encoding/utf8/utf8_util.v b/vlib/encoding/utf8/utf8_util.v index 8ad5eb8bf2..34b8f107b2 100644 --- a/vlib/encoding/utf8/utf8_util.v +++ b/vlib/encoding/utf8/utf8_util.v @@ -73,7 +73,7 @@ pub fn get_rune(s string, index int) rune { return res } -// raw_index - get the raw character from the string by the given index value. +// raw_index - get the raw unicode character from the UTF-8 string by the given index value as UTF-8 string. // example: utf8.raw_index('我是V Lang', 1) => '是' pub fn raw_index(s string, index int) string { mut r := []rune{} diff --git a/vlib/readline/readline_default.c.v b/vlib/readline/readline_default.c.v index 3661d82f9d..8ed41844b6 100644 --- a/vlib/readline/readline_default.c.v +++ b/vlib/readline/readline_default.c.v @@ -25,7 +25,7 @@ struct Termios { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. @@ -63,7 +63,7 @@ pub fn (mut r Readline) read_line(prompt string) !string { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. diff --git a/vlib/readline/readline_js.js.v b/vlib/readline/readline_js.js.v index 09b7c729c3..15eb8bf389 100644 --- a/vlib/readline/readline_js.js.v +++ b/vlib/readline/readline_js.js.v @@ -10,7 +10,7 @@ struct Termios {} // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. diff --git a/vlib/readline/readline_nix.c.v b/vlib/readline/readline_nix.c.v index dcfe35d609..c34e49c6b8 100644 --- a/vlib/readline/readline_nix.c.v +++ b/vlib/readline/readline_nix.c.v @@ -101,7 +101,7 @@ pub fn (r Readline) read_char() !int { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. @@ -152,7 +152,7 @@ pub fn (mut r Readline) read_line(prompt string) !string { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. diff --git a/vlib/readline/readline_windows.c.v b/vlib/readline/readline_windows.c.v index 69f8daed43..f02d078afb 100644 --- a/vlib/readline/readline_windows.c.v +++ b/vlib/readline/readline_windows.c.v @@ -20,7 +20,7 @@ struct Termios { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. @@ -56,7 +56,7 @@ pub fn (mut r Readline) read_line(prompt string) !string { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or +// read_line_utf8 returns the complete UTF-8 input line as an UTF-32 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct.