net.html: fix panic in html.parse() called with empty string, remove replacement of \n in the original content (#17206)

This commit is contained in:
walking devel 2023-02-02 22:32:03 +00:00 committed by GitHub
parent a8102f14be
commit 8cdc554c63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 4 deletions

View file

@ -13,6 +13,7 @@ mut:
is_attribute bool
opened_code_type string
line_count int
outside_tag bool
lexeme_builder strings.Builder = strings.new_builder(100)
code_tags map[string]bool = {
'script': true
@ -90,6 +91,7 @@ fn (mut parser Parser) init() {
parser.tags = []&Tag{}
parser.dom.close_tags['/!document'] = true
parser.lexical_attributes.current_tag = &Tag{}
parser.lexical_attributes.outside_tag = true
parser.initialized = true
}
@ -231,19 +233,40 @@ pub fn (mut parser Parser) split_parse(data string) {
parser.lexical_attributes.lexeme_builder.go_back_to(0)
parser.generate_tag()
parser.lexical_attributes.open_tag = true
parser.lexical_attributes.outside_tag = false
} else {
parser.lexical_attributes.lexeme_builder.write_u8(chr)
}
}
// If `data` has not tags but has only text.
if parser.lexical_attributes.outside_tag {
temp_string := parser.lexical_attributes.lexeme_builder.str()
if parser.tags.len == 0 {
parser.tags << &Tag{
name: 'text'
content: temp_string
}
} else if parser.tags.len == 1 {
mut tag := parser.tags.first()
if tag.name == 'text' {
tag.content += temp_string
}
}
}
}
// parse_html parses the given HTML string
pub fn (mut parser Parser) parse_html(data string) {
parser.init()
mut lines := data.split_into_lines()
for line in lines {
for index, line in lines {
parser.lexical_attributes.line_count++
parser.split_parse(line)
// Parser shouldn't replace `\n`, because it may break JS code or text which sticks together.
// After `split_into_lines()` we need to add `\n` again.
parser.split_parse(if index < lines.len - 1 { '${line}\n' } else { line })
}
parser.generate_tag()
parser.dom.debug_file = parser.debug_file