From 0898f579955738c855b9cf6ee6c5329a838f5122 Mon Sep 17 00:00:00 2001 From: Casper Kuethe <43839798+Casper64@users.noreply.github.com> Date: Sun, 30 Apr 2023 15:20:24 +0200 Subject: [PATCH] net.html: fix text parsing for inline tags (#18085) --- vlib/net/html/html_test.v | 17 +++++++++++++++++ vlib/net/html/parser.v | 14 ++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/vlib/net/html/html_test.v b/vlib/net/html/html_test.v index 51271cd9e5..2a518a2ddb 100644 --- a/vlib/net/html/html_test.v +++ b/vlib/net/html/html_test.v @@ -13,3 +13,20 @@ fn test_parse() { assert h1_tag.str() == '

Hello world!

' // assert h1_tag.str() == '

Hello world!

' } + +fn test_parse_inline_tags() { + doc := parse('

before in between after

') + tags := doc.get_tag('span') + assert tags.len == 1 + + span_tag := tags[0] + assert span_tag.str() == 'in between' + + p_tags := doc.get_tag('p') + assert p_tags.len == 1 + + p_tag := p_tags[0] + assert p_tag.str() == '

before in between after

' + + assert p_tag.text() == 'before in between after' +} diff --git a/vlib/net/html/parser.v b/vlib/net/html/parser.v index 73997b6b9e..c54f1590fe 100644 --- a/vlib/net/html/parser.v +++ b/vlib/net/html/parser.v @@ -14,6 +14,7 @@ mut: opened_code_type string line_count int outside_tag bool + text_after_tag bool lexeme_builder strings.Builder = strings.new_builder(100) code_tags map[string]bool = { 'script': true @@ -221,10 +222,7 @@ pub fn (mut parser Parser) split_parse(data string) { if parser.lexical_attributes.current_tag.name.len > 1 && parser.lexical_attributes.current_tag.name[0] == 47 && !blank_string(temp_string) { - parser.tags << &Tag{ - name: 'text' - content: temp_string - } + parser.lexical_attributes.text_after_tag = true } else { parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content } @@ -234,6 +232,14 @@ pub fn (mut parser Parser) split_parse(data string) { parser.generate_tag() parser.lexical_attributes.open_tag = true parser.lexical_attributes.outside_tag = false + + if parser.lexical_attributes.text_after_tag == true { + parser.tags << &Tag{ + name: 'text' + content: temp_string + } + parser.lexical_attributes.text_after_tag = false + } } else { parser.lexical_attributes.lexeme_builder.write_u8(chr) }