all: unwrap const() blocks

This commit is contained in:
Alexander Medvednikov 2023-11-25 10:02:51 +03:00
parent 399af6768d
commit f09826e928
436 changed files with 10448 additions and 11207 deletions

View file

@ -16,59 +16,77 @@ module regex
import strings
pub const (
v_regex_version = '1.0 alpha' // regex module version
pub const v_regex_version = '1.0 alpha' // regex module version
max_code_len = 256 // default small base code len for the regex programs
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
// new line chars for now only '\n'
new_line_list = [`\n`, `\r`]
pub const max_code_len = 256 // default small base code len for the regex programs
// Results
no_match_found = -1
pub const max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
// Errors
compile_ok = 0 // the regex string compiled, all ok
err_char_unknown = -2 // the char used is unknow to the system
err_undefined = -3 // the compiler symbol is undefined
err_internal_error = -4 // Bug in the regex system!!
err_cc_alloc_overflow = -5 // memory for char class full!!
err_syntax_error = -6 // syntax error in regex compiling
err_groups_overflow = -7 // max number of groups reached
err_groups_max_nested = -8 // max number of nested group reached
err_group_not_balanced = -9 // group not balanced
err_group_qm_notation = -10 // group invalid notation
err_invalid_or_with_cc = -11 // invalid or on two consecutive char class
err_neg_group_quantifier = -12 // negation groups can not have quantifier
err_consecutive_dots = -13 // two consecutive dots is an error
)
// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
pub const spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
// new line chars for now only '\n'
pub const new_line_list = [`\n`, `\r`]
const (
//*************************************
// regex program instructions
//*************************************
ist_simple_char = u32(0x7FFFFFFF) // single char instruction, 31 bit available to char
// char class 11 0100 AA xxxxxxxx
// AA = 00 regular class
// AA = 01 Negated class ^ char
ist_char_class = u32(0xD1000000) // MASK
ist_char_class_pos = u32(0xD0000000) // char class normal [abc]
ist_char_class_neg = u32(0xD1000000) // char class negate [^abc]
// dot char 10 0110 xx xxxxxxxx
ist_dot_char = u32(0x98000000) // match any char except \n
// backslash chars 10 0100 xx xxxxxxxx
ist_bsls_char = u32(0x90000000) // backslash char
// OR | 10 010Y xx xxxxxxxx
ist_or_branch = u32(0x91000000) // OR case
// groups 10 010Y xx xxxxxxxx
ist_group_start = u32(0x92000000) // group start (
ist_group_end = u32(0x94000000) // group end )
// control instructions
ist_prog_end = u32(0x88000000) // 10 0010 xx xxxxxxxx
//*************************************
)
// Results
pub const no_match_found = -1
// Errors
pub const compile_ok = 0 // the regex string compiled, all ok
pub const err_char_unknown = -2 // the char used is unknow to the system
pub const err_undefined = -3 // the compiler symbol is undefined
pub const err_internal_error = -4 // Bug in the regex system!!
pub const err_cc_alloc_overflow = -5 // memory for char class full!!
pub const err_syntax_error = -6 // syntax error in regex compiling
pub const err_groups_overflow = -7 // max number of groups reached
pub const err_groups_max_nested = -8 // max number of nested group reached
pub const err_group_not_balanced = -9 // group not balanced
pub const err_group_qm_notation = -10 // group invalid notation
pub const err_invalid_or_with_cc = -11 // invalid or on two consecutive char class
pub const err_neg_group_quantifier = -12 // negation groups can not have quantifier
pub const err_consecutive_dots = -13
//*************************************
// regex program instructions
//*************************************
const ist_simple_char = u32(0x7FFFFFFF) // single char instruction, 31 bit available to char
// char class 11 0100 AA xxxxxxxx
// AA = 00 regular class
// AA = 01 Negated class ^ char
const ist_char_class = u32(0xD1000000) // MASK
const ist_char_class_pos = u32(0xD0000000) // char class normal [abc]
const ist_char_class_neg = u32(0xD1000000) // char class negate [^abc]
// dot char 10 0110 xx xxxxxxxx
const ist_dot_char = u32(0x98000000) // match any char except \n
// backslash chars 10 0100 xx xxxxxxxx
const ist_bsls_char = u32(0x90000000) // backslash char
// OR | 10 010Y xx xxxxxxxx
const ist_or_branch = u32(0x91000000) // OR case
// groups 10 010Y xx xxxxxxxx
const ist_group_start = u32(0x92000000) // group start (
const ist_group_end = u32(0x94000000) // group end )
// control instructions
const ist_prog_end = u32(0x88000000)
/*
General Utilities
@ -275,16 +293,18 @@ fn (mut tok Token) reset() {
* Regex struct
*
******************************************************************************/
pub const (
f_nl = 0x00000001 // end the match when find a new line symbol
f_ms = 0x00000002 // match true only if the match is at the start of the string
f_me = 0x00000004 // match true only if the match is at the end of the string
pub const f_nl = 0x00000001 // end the match when find a new line symbol
f_efm = 0x00000100 // exit on first token matched, used by search
f_bin = 0x00000200 // work only on bytes, ignore utf-8
// behaviour modifier flags
f_src = 0x00020000 // search mode enabled
)
pub const f_ms = 0x00000002 // match true only if the match is at the start of the string
pub const f_me = 0x00000004 // match true only if the match is at the end of the string
pub const f_efm = 0x00000100 // exit on first token matched, used by search
pub const f_bin = 0x00000200 // work only on bytes, ignore utf-8
// behaviour modifier flags
pub const f_src = 0x00020000
// Log function prototype
pub type FnLog = fn (string)
@ -379,22 +399,20 @@ struct BslsStruct {
validator FnValidator = unsafe { nil } // validator function pointer
}
const (
bsls_validator_array = [
BslsStruct{`w`, is_alnum},
BslsStruct{`W`, is_not_alnum},
BslsStruct{`s`, is_space},
BslsStruct{`S`, is_not_space},
BslsStruct{`d`, is_digit},
BslsStruct{`D`, is_not_digit},
BslsStruct{`a`, is_lower},
BslsStruct{`A`, is_upper},
]
const bsls_validator_array = [
BslsStruct{`w`, is_alnum},
BslsStruct{`W`, is_not_alnum},
BslsStruct{`s`, is_space},
BslsStruct{`S`, is_not_space},
BslsStruct{`d`, is_digit},
BslsStruct{`D`, is_not_digit},
BslsStruct{`a`, is_lower},
BslsStruct{`A`, is_upper},
]
// these chars are escape if preceded by a \
bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`,
`^`, `!`]
)
// these chars are escape if preceded by a \
const bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`,
`^`, `!`]
enum BSLS_parse_state {
start
@ -450,13 +468,15 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int, int) {
* Char class
*
******************************************************************************/
const (
cc_null = 0 // empty cc token
cc_char = 1 // simple char: a
cc_int = 2 // char interval: a-z
cc_bsls = 3 // backslash char
cc_end = 4 // cc sequence terminator
)
const cc_null = 0 // empty cc token
const cc_char = 1 // simple char: a
const cc_int = 2 // char interval: a-z
const cc_bsls = 3 // backslash char
const cc_end = 4
struct CharClass {
mut:
@ -910,9 +930,7 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, bool, string, int)
return -2, true, false, name, i
}
const (
quntifier_chars = [rune(`+`), `*`, `?`, `{`]
)
const quntifier_chars = [rune(`+`), `*`, `?`, `{`]
//
// main compiler

View file

@ -776,17 +776,15 @@ fn rest_regex_replace_n() {
}
// test quantifier wrong sequences
const (
test_quantifier_sequences_list = [
r'+{3}.*+{3}',
r'+{3}.*?{3}',
r'+{3}.**{3}',
r'+{3}.*\+{3}*',
r'+{3}.*\+{3}+',
r'+{3}.*\+{3}??',
r'+{3}.*\+{3}{4}',
]
)
const test_quantifier_sequences_list = [
r'+{3}.*+{3}',
r'+{3}.*?{3}',
r'+{3}.**{3}',
r'+{3}.*\+{3}*',
r'+{3}.*\+{3}+',
r'+{3}.*\+{3}??',
r'+{3}.*\+{3}{4}',
]
fn test_quantifier_sequences() {
for pattern in test_quantifier_sequences_list {
@ -863,11 +861,9 @@ fn test_groups_in_find() {
}
}
const (
err_query_list = [
r'([a]|[b])*',
]
)
const err_query_list = [
r'([a]|[b])*',
]
fn test_errors() {
mut count := 0
@ -923,16 +919,14 @@ struct Test_negation_group {
res bool
}
const (
negation_groups = [
Test_negation_group{'automobile', false},
Test_negation_group{'botomobile', true},
Test_negation_group{'auto_caravan', false},
Test_negation_group{'moto_mobile', true},
Test_negation_group{'pippole', true},
Test_negation_group{'boring test', false},
]
)
const negation_groups = [
Test_negation_group{'automobile', false},
Test_negation_group{'botomobile', true},
Test_negation_group{'auto_caravan', false},
Test_negation_group{'moto_mobile', true},
Test_negation_group{'pippole', true},
Test_negation_group{'boring test', false},
]
fn test_negation_groups() {
mut query := r'(?!auto)\w+le'