From b2ca3ac089dbe0b3443678fc65ff2cb3dbccff0b Mon Sep 17 00:00:00 2001 From: Turiiya <34311583+tobealive@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:10:31 +0200 Subject: [PATCH] net.html: fix semantic inconsistencies of tag retrieving functions (#18558) --- examples/web_crawler/web_crawler.v | 4 +- vlib/net/html/dom.v | 76 ++++++++++++++++++++---------- vlib/net/html/dom_test.v | 16 +++---- vlib/net/html/tag.v | 8 ++-- 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/examples/web_crawler/web_crawler.v b/examples/web_crawler/web_crawler.v index 21e69d4b3b..da1ed3342c 100644 --- a/examples/web_crawler/web_crawler.v +++ b/examples/web_crawler/web_crawler.v @@ -12,8 +12,8 @@ fn main() { } // html.parse() parses and returns the DOM from the given text. mut doc := html.parse(resp.body) - // html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value. - tags := doc.get_tag_by_attribute_value('class', 'list_article_item') + // html.DocumentObjectModel.get_tags_by_attribute_value() retrieves all tags in the document that have the given attribute name and value. + tags := doc.get_tags_by_attribute_value('class', 'list_article_item') for tag in tags { el := tag.children[1].children[0].children[0].children[0] href := el.attributes['href'] or { panic('key not found') } diff --git a/vlib/net/html/dom.v b/vlib/net/html/dom.v index f131c0d48f..fb0751b916 100644 --- a/vlib/net/html/dom.v +++ b/vlib/net/html/dom.v @@ -21,6 +21,11 @@ mut: debug_file os.File } +[params] +pub struct GetTagsOptions { + name string +} + [if debug_html ?] fn (mut dom DocumentObjectModel) print_debug(data string) { if data.len > 0 { @@ -163,8 +168,43 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) { dom.root = tag_list[0] } -// get_tag_by_attribute_value retrieves all the tags in the document that has the given attribute name and value. -pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag { +// get_root returns the root of the document. +pub fn (dom DocumentObjectModel) get_root() &Tag { + return dom.root +} + +// get_tag retrieves all tags in the document that have the given tag name. +[deprecated: 'use get_tags instead'] +pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag { + return if name in dom.tag_type { dom.tag_type[name] } else { []&Tag{} } +} + +// get_tags returns all tags stored in the document. +pub fn (dom DocumentObjectModel) get_tags(options GetTagsOptions) []&Tag { + if options.name != '' { + return if options.name in dom.tag_type { dom.tag_type[options.name] } else { []&Tag{} } + } + return dom.all_tags +} + +// get_tags_by_class_name retrieves all tags recursively in the document root that have the given class name(s). +pub fn (dom DocumentObjectModel) get_tags_by_class_name(names ...string) []&Tag { + return dom.root.get_tags_by_class_name(...names) +} + +// get_tag_by_attribute retrieves all tags in the document that have the given attribute name. +[deprecated: 'use get_tags_by_attribute instead'] +pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag { + return if name in dom.all_attributes { dom.all_attributes[name] } else { []&Tag{} } +} + +// get_tags_by_attribute retrieves all tags in the document that have the given attribute name. +pub fn (dom DocumentObjectModel) get_tags_by_attribute(name string) []&Tag { + return if name in dom.all_attributes { dom.all_attributes[name] } else { []&Tag{} } +} + +// get_tags_by_attribute_value retrieves all tags in the document that have the given attribute name and value. +pub fn (mut dom DocumentObjectModel) get_tags_by_attribute_value(name string, value string) []&Tag { location := dom.where_is(value, name) return if dom.tag_attributes[name].len > location { dom.tag_attributes[name][location] @@ -173,27 +213,13 @@ pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, val } } -// get_tag retrieves all the tags in the document that has the given tag name. -pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag { - return if name in dom.tag_type { dom.tag_type[name] } else { []&Tag{} } -} - -// get_tag_by_attribute retrieves all the tags in the document that has the given attribute name. -pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag { - return if name in dom.all_attributes { dom.all_attributes[name] } else { []&Tag{} } -} - -// get_root returns the root of the document. -pub fn (dom DocumentObjectModel) get_root() &Tag { - return dom.root -} - -// get_tags returns all of the tags stored in the document. -pub fn (dom DocumentObjectModel) get_tags() []&Tag { - return dom.all_tags -} - -// get_tags_by_class_name retrieves all the tags recursively in the document that has the given class name(s). -pub fn (dom DocumentObjectModel) get_tags_by_class_name(names ...string) []&Tag { - return dom.root.get_tags_by_class_name(...names) +// get_tag_by_attribute_value retrieves all tags in the document that have the given attribute name and value. +[deprecated: 'use get_tags_by_attribute_value instead'] +pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag { + location := dom.where_is(value, name) + return if dom.tag_attributes[name].len > location { + dom.tag_attributes[name][location] + } else { + []&Tag{} + } } diff --git a/vlib/net/html/dom_test.v b/vlib/net/html/dom_test.v index cbd8b12340..be46007172 100644 --- a/vlib/net/html/dom_test.v +++ b/vlib/net/html/dom_test.v @@ -15,22 +15,22 @@ fn generate_temp_html() string { fn test_search_by_tag_type() { dom := parse(generate_temp_html()) - assert dom.get_tag('div').len == 4 - assert dom.get_tag('head').len == 1 - assert dom.get_tag('body').len == 1 + assert dom.get_tags(name: 'div').len == 4 + assert dom.get_tags(name: 'head').len == 1 + assert dom.get_tags(name: 'body').len == 1 } fn test_search_by_attribute_value() { mut dom := parse(generate_temp_html()) // println(temp_html) print('Amount ') - println(dom.get_tag_by_attribute_value('id', 'name_0')) - assert dom.get_tag_by_attribute_value('id', 'name_0').len == 1 + println(dom.get_tags_by_attribute_value('id', 'name_0')) + assert dom.get_tags_by_attribute_value('id', 'name_0').len == 1 } fn test_access_parent() { mut dom := parse(generate_temp_html()) - div_tags := dom.get_tag('div') + div_tags := dom.get_tags(name: 'div') parent := div_tags[0].parent assert unsafe { parent != 0 } for div_tag in div_tags { @@ -40,7 +40,7 @@ fn test_access_parent() { fn test_search_by_attributes() { dom := parse(generate_temp_html()) - assert dom.get_tag_by_attribute('id').len == 4 + assert dom.get_tags_by_attribute('id').len == 4 } fn test_tags_used() { @@ -50,7 +50,7 @@ fn test_tags_used() { fn test_access_tag_fields() { dom := parse(generate_temp_html()) - id_tags := dom.get_tag_by_attribute('id') + id_tags := dom.get_tags_by_attribute('id') assert id_tags[0].name == 'div' assert id_tags[1].attributes['class'] == 'several-1' } diff --git a/vlib/net/html/tag.v b/vlib/net/html/tag.v index c0df75b7e7..53d7232f61 100644 --- a/vlib/net/html/tag.v +++ b/vlib/net/html/tag.v @@ -82,7 +82,7 @@ pub fn (tag &Tag) get_tag(name string) ?&Tag { return none } -// get_tags retrieves all the child tags recursively in the tag that has the given tag name. +// get_tags retrieves all child tags recursively in the tag that have the given tag name. pub fn (tag &Tag) get_tags(name string) []&Tag { mut res := []&Tag{} for child in tag.children { @@ -107,7 +107,7 @@ pub fn (tag &Tag) get_tag_by_attribute(name string) ?&Tag { return none } -// get_tags_by_attribute retrieves all the child tags recursively in the tag that has the given attribute name. +// get_tags_by_attribute retrieves all child tags recursively in the tag that have the given attribute name. pub fn (tag &Tag) get_tags_by_attribute(name string) []&Tag { mut res := []&Tag{} for child in tag.children { @@ -132,7 +132,7 @@ pub fn (tag &Tag) get_tag_by_attribute_value(name string, value string) ?&Tag { return none } -// get_tags_by_attribute_value retrieves all the child tags recursively in the tag that has the given attribute name and value. +// get_tags_by_attribute_value retrieves all child tags recursively in the tag that have the given attribute name and value. pub fn (tag &Tag) get_tags_by_attribute_value(name string, value string) []&Tag { mut res := []&Tag{} for child in tag.children { @@ -164,7 +164,7 @@ pub fn (tag &Tag) get_tag_by_class_name(names ...string) ?&Tag { return none } -// get_tags_by_class_name retrieves all the child tags recursively in the tag that has the given class name(s). +// get_tags_by_class_name retrieves all child tags recursively in the tag that have the given class name(s). pub fn (tag &Tag) get_tags_by_class_name(names ...string) []&Tag { mut res := []&Tag{} for child in tag.children {