diff --git a/.gitignore b/.gitignore index 905d2bf174..ca30ee1b9c 100644 --- a/.gitignore +++ b/.gitignore @@ -130,4 +130,9 @@ vls.log wasm.v TAGS tags -vlib/builtin/js/*.js + +# ignore large GTK *.gir files +Gtk-4.0.gir +*.gir + +vlib/builtin/js/*.js \ No newline at end of file diff --git a/vlib/encoding/xml/README.md b/vlib/encoding/xml/README.md new file mode 100644 index 0000000000..161baf1af9 --- /dev/null +++ b/vlib/encoding/xml/README.md @@ -0,0 +1,44 @@ +## Description + +`xml` is a module to parse XML documents into a tree structure. It also supports +validation of XML documents against a DTD. + +Note that this is not a streaming XML parser. It reads the entire document into +memory and then parses it. This is not a problem for small documents, but it +might be a problem for extremely large documents (several hundred megabytes or more). + +## Usage + +### Parsing XML Files + +There are three different ways to parse an XML Document: + +1. Pass the entire XML document as a string to `XMLDocument.from_string`. +2. Specify a file path to `XMLDocument.from_file`. +3. Use a source that implements `io.Reader` and pass it to `XMLDocument.from_reader`. + +```v +import encoding.xml + +//... +doc := xml.XMLDocument.from_file('test/sample.xml')! +``` + +### Validating XML Documents + +Simply call `validate` on the parsed XML document. + +### Querying + +Check the `get_element...` methods defined on the XMLDocument struct. + +### Escaping and Un-escaping XML Entities + +When the `validate` method is called, the XML document is parsed and all text +nodes are un-escaped. This means that the text nodes will contain the actual +text and not the escaped version of the text. + +When the XML document is serialized (using `str` or `pretty_str`), all text nodes are escaped. + +The escaping and un-escaping can also be done manually using the `escape_text` and +`unescape_text` methods. diff --git a/vlib/encoding/xml/encoding.v b/vlib/encoding/xml/encoding.v new file mode 100644 index 0000000000..2ef924af8b --- /dev/null +++ b/vlib/encoding/xml/encoding.v @@ -0,0 +1,148 @@ +module xml + +import strings + +// pretty_str returns a pretty-printed version of the XML node. It requires the current indentation +// the node is at, the depth of the node in the tree, and a map of reverse entities to use when +// escaping text. +pub fn (node XMLNode) pretty_str(original_indent string, depth int, reverse_entities map[string]string) string { + // Create the proper indentation first + mut indent_builder := strings.new_builder(original_indent.len * depth) + for _ in 0 .. depth { + indent_builder.write_string(original_indent) + } + indent := indent_builder.str() + + // Now we can stringify the node + mut builder := strings.new_builder(1024) + builder.write_string(indent) + builder.write_u8(`<`) + builder.write_string(node.name) + + for key, value in node.attributes { + builder.write_u8(` `) + builder.write_string(key) + builder.write_string('="') + builder.write_string(value) + builder.write_u8(`"`) + } + builder.write_string('>\n') + for child in node.children { + match child { + string { + builder.write_string(indent) + builder.write_string(original_indent) + builder.write_string(escape_text(child, reverse_entities: reverse_entities)) + } + XMLNode { + builder.write_string(child.pretty_str(original_indent, depth + 1, reverse_entities)) + } + XMLComment { + builder.write_string(indent) + builder.write_string(original_indent) + builder.write_string('') + } + XMLCData { + builder.write_string(indent) + builder.write_string(original_indent) + builder.write_string('') + } + } + builder.write_u8(`\n`) + } + builder.write_string(indent) + builder.write_string('`) + return builder.str() +} + +fn (list []DTDListItem) pretty_str(indent string) string { + if list.len == 0 { + return '' + } + + mut builder := strings.new_builder(1024) + builder.write_u8(`[`) + builder.write_u8(`\n`) + + for item in list { + match item { + DTDEntity { + builder.write_string('${indent}') + } + DTDElement { + builder.write_string('${indent}') + } + } + builder.write_u8(`\n`) + } + builder.write_u8(`]`) + return builder.str() +} + +fn (doctype DocumentType) pretty_str(indent string) string { + match doctype.dtd { + string { + content := doctype.dtd + return if content.len > 0 { + '' + } else { + '' + } + } + DocumentTypeDefinition { + if doctype.dtd.list.len == 0 { + return '' + } + + mut builder := strings.new_builder(1024) + builder.write_string('') + builder.write_u8(`\n`) + return builder.str() + } + } +} + +// pretty_str returns a pretty-printed version of the XML document. It requires the string used to +// indent each level of the document. +pub fn (doc XMLDocument) pretty_str(indent string) string { + mut document_builder := strings.new_builder(1024) + + prolog := '' + comments := if doc.comments.len > 0 { + mut comments_buffer := strings.new_builder(512) + for comment in doc.comments { + comments_buffer.write_string('') + comments_buffer.write_u8(`\n`) + } + comments_buffer.str() + } else { + '' + } + + document_builder.write_string(prolog) + document_builder.write_u8(`\n`) + document_builder.write_string(doc.doctype.pretty_str(indent)) + document_builder.write_u8(`\n`) + document_builder.write_string(comments) + document_builder.write_string(doc.root.pretty_str(indent, 0, doc.parsed_reverse_entities)) + + return document_builder.str() +} + +// str returns a string representation of the XML document. It uses a 2-space indentation +// to pretty-print the document. +pub fn (doc XMLDocument) str() string { + return doc.pretty_str(' ') +} diff --git a/vlib/encoding/xml/entity.v b/vlib/encoding/xml/entity.v new file mode 100644 index 0000000000..708fc06903 --- /dev/null +++ b/vlib/encoding/xml/entity.v @@ -0,0 +1,79 @@ +module xml + +import strings + +pub const default_entities = { + 'lt': '<' + 'gt': '>' + 'amp': '&' + 'apos': "'" + 'quot': '"' +} + +pub const default_entities_reverse = { + '<': 'lt' + '>': 'gt' + '&': 'amp' + "'": 'apos' + '"': 'quot' +} + +[params] +pub struct EscapeConfig { + reverse_entities map[string]string = xml.default_entities_reverse +} + +// escape_text replaces all entities in the given string with their respective +// XML entity strings. See default_entities, which can be overridden. +pub fn escape_text(content string, config EscapeConfig) string { + mut flattened_entities := []string{cap: 2 * config.reverse_entities.len} + + for target, replacement in config.reverse_entities { + flattened_entities << target + flattened_entities << '&' + replacement + ';' + } + + return content.replace_each(flattened_entities) +} + +[params] +pub struct UnescapeConfig { + entities map[string]string = xml.default_entities +} + +// unescape_text replaces all entities in the given string with their respective +// original characters or strings. See default_entities_reverse, which can be overridden. +pub fn unescape_text(content string, config UnescapeConfig) !string { + mut buffer := strings.new_builder(content.len) + mut index := 0 + runes := content.runes() + for index < runes.len { + match runes[index] { + `&` { + mut offset := 1 + mut entity_buf := strings.new_builder(8) + for index + offset < runes.len && runes[index + offset] != `;` { + entity_buf.write_rune(runes[index + offset]) + offset++ + } + // Did we reach the end of the string? + if index + offset == runes.len { + return error('Unexpected end of string while parsing entity.') + } + // Did we find a valid entity? + entity := entity_buf.str() + if entity in config.entities { + buffer.write_string(config.entities[entity]) + index += offset + } else { + return error('Unknown entity: ' + entity) + } + } + else { + buffer.write_rune(runes[index]) + } + } + index++ + } + return buffer.str() +} diff --git a/vlib/encoding/xml/entity_test.v b/vlib/encoding/xml/entity_test.v new file mode 100644 index 0000000000..6e53b3c032 --- /dev/null +++ b/vlib/encoding/xml/entity_test.v @@ -0,0 +1,35 @@ +module main + +import encoding.xml + +fn test_escape() { + assert xml.escape_text('Normal string') == 'Normal string' + assert xml.escape_text('12 < 34') == '12 < 34' + assert xml.escape_text('12 > 34') == '12 > 34' + assert xml.escape_text('12 & 34') == '12 & 34' + assert xml.escape_text('He said, "Very well, let us proceed."') == 'He said, "Very well, let us proceed."' + assert xml.escape_text("He said, 'Very well, let us proceed.'") == 'He said, 'Very well, let us proceed.'' + + assert xml.escape_text('Do not escape ©.') == 'Do not escape ©.' + + mut reverse_entities := xml.default_entities_reverse.clone() + reverse_entities['©'] = 'copy' + assert xml.escape_text('Do escape ©.', reverse_entities: reverse_entities) == 'Do escape ©.' +} + +fn test_unescape() ! { + assert xml.unescape_text('Normal string')! == 'Normal string' + assert xml.unescape_text('12 < 34')! == '12 < 34' + assert xml.unescape_text('12 > 34')! == '12 > 34' + assert xml.unescape_text('12 & 34')! == '12 & 34' + assert xml.unescape_text('He said, "Very well, let us proceed."')! == 'He said, "Very well, let us proceed."' + assert xml.unescape_text('He said, 'Very well, let us proceed.'')! == "He said, 'Very well, let us proceed.'" + + xml.unescape_text('12 &invalid; 34') or { assert err.msg() == 'Unknown entity: invalid' } + + xml.unescape_text('Do not unescape ©') or { assert err.msg() == 'Unknown entity: copy' } + + mut entities := xml.default_entities.clone() + entities['copy'] = '©' + assert xml.unescape_text('Do unescape ©.', entities: entities)! == 'Do unescape ©.' +} diff --git a/vlib/encoding/xml/parser.v b/vlib/encoding/xml/parser.v new file mode 100644 index 0000000000..b4a8d55057 --- /dev/null +++ b/vlib/encoding/xml/parser.v @@ -0,0 +1,604 @@ +module xml + +import io +import os +import strings + +const ( + default_prolog_attributes = { + 'version': '1.0' + 'encoding': 'UTF-8' + } + default_string_builder_cap = 32 + + element_len = '` { + break + } + return error('XML Comment not closed. Expected ">".') + } else { + comment_buffer.write_u8(ch) + comment_buffer.write_u8(after_ch) + } + } + else { + comment_buffer.write_u8(ch) + } + } + } + + comment_contents := comment_buffer.str() + return XMLComment{comment_contents} +} + +enum CDATAParserState { + normal + single + double +} + +fn parse_cdata(mut reader io.Reader) !XMLCData { + mut contents_buf := strings.new_builder(xml.default_string_builder_cap) + + mut state := CDATAParserState.normal + mut local_buf := [u8(0)] + + for { + ch := next_char(mut reader, mut local_buf)! + contents_buf.write_u8(ch) + match ch { + `]` { + match state { + .double { + // Another ] after the ]] for some reason. Keep the state + } + .single { + state = .double + } + .normal { + state = .single + } + } + } + `>` { + match state { + .double { + break + } + else { + state = .normal + } + } + } + else { + state = .normal + } + } + } + + contents := contents_buf.str().trim_space() + if !contents.ends_with(']]>') { + return error('CDATA section not closed.') + } + return XMLCData{contents[1..contents.len - 3]} +} + +fn parse_entity(contents string) !(DTDEntity, string) { + // We find the nearest '>' to the start of the ENTITY + entity_end := contents.index('>') or { return error('Entity declaration not closed.') } + entity_contents := contents[xml.entity_len..entity_end] + + name := entity_contents.trim_left(' \t\n').all_before(' ') + if name.len == 0 { + return error('Entity is missing name.') + } + value := entity_contents.all_after_first(name).trim_space().trim('"\'') + if value.len == 0 { + return error('Entity is missing value.') + } + + // TODO: Add support for SYSTEM and PUBLIC entities + + return DTDEntity{name, value}, contents[entity_end + 1..] +} + +fn parse_element(contents string) !(DTDElement, string) { + // We find the nearest '>' to the start of the ELEMENT + element_end := contents.index('>') or { return error('Element declaration not closed.') } + element_contents := contents[xml.element_len..element_end].trim_left(' \t\n') + + mut name_span := TextSpan{} + + for ch in element_contents { + match ch { + ` `, `\t`, `\n` { + break + } + // Valid characters in an entity name are: + // 1. Lowercase alphabet - a-z + // 2. Uppercase alphabet - A-Z + // 3. Numbers - 0-9 + // 4. Underscore - _ + // 5. Colon - : + // 6. Period - . + `a`...`z`, `A`...`Z`, `0`...`9`, `_`, `:`, `.` { + name_span.end++ + } + else { + return error('Invalid character in element name: "${ch}"') + } + } + } + + name := element_contents[name_span.start..name_span.end].trim_left(' \t\n') + if name.len == 0 { + return error('Element is missing name.') + } + definition_string := element_contents.all_after_first(name).trim_space().trim('"\'') + + definition := if definition_string.starts_with('(') { + // We have a list of possible children + + // Ensure that both ( and ) are present + if !definition_string.ends_with(')') { + return error('Element declaration not closed.') + } + + definition_string.trim('()').split(',') + } else { + // Invalid definition + return error('Invalid element definition: ${definition_string}') + } + + // TODO: Add support for SYSTEM and PUBLIC entities + + return DTDElement{name, definition}, contents[element_end + 1..] +} + +fn parse_doctype(mut reader io.Reader) !DocumentType { + // We may have more < in the doctype so keep count + mut depth := 1 + mut doctype_buffer := strings.new_builder(xml.default_string_builder_cap) + mut local_buf := [u8(0)] + for { + ch := next_char(mut reader, mut local_buf)! + doctype_buffer.write_u8(ch) + match ch { + `<` { + depth++ + } + `>` { + depth-- + if depth == 0 { + break + } + } + else {} + } + } + + doctype_contents := doctype_buffer.str().trim_space() + + name := doctype_contents.all_before('[').trim_space() + + mut list_contents := doctype_contents.all_after('[').all_before(']').trim_space() + mut items := []DTDListItem{} + + for list_contents.len > 0 { + if list_contents.starts_with('` { + if found_question_mark { + break + } + return error('Invalid prolog: Found ">" before "?".') + } + else { + if found_question_mark { + found_question_mark = false + prolog_buffer.write_u8(`?`) + } + prolog_buffer.write_u8(ch) + } + } + } + + prolog_attributes := prolog_buffer.str().trim_space() + + attributes := if prolog_attributes.len == 0 { + xml.default_prolog_attributes + } else { + parse_attributes(prolog_attributes)! + } + + version := attributes['version'] or { return error('XML declaration missing version.') } + encoding := attributes['encoding'] or { 'UTF-8' } + + mut comments := []XMLComment{} + mut doctype := DocumentType{ + name: '' + dtd: '' + } + mut found_doctype := false + for { + ch = next_char(mut reader, mut local_buf)! + match ch { + ` `, `\t`, `\n` { + continue + } + `<` { + // We have a comment, DOCTYPE, or root node + ch = next_char(mut reader, mut local_buf)! + match ch { + `!` { + // A comment or DOCTYPE + match next_char(mut reader, mut local_buf)! { + `-` { + // A comment + if next_char(mut reader, mut local_buf)! != `-` { + return error('Invalid comment.') + } + comments << parse_comment(mut reader)! + } + `D` { + if found_doctype { + return error('Duplicate DOCTYPE declaration.') + } + // OCTYPE + mut doc_buf := []u8{len: 6} + if reader.read(mut doc_buf)! != 6 { + return error('Invalid DOCTYPE.') + } + if doc_buf != xml.doctype_chars { + return error('Invalid DOCTYPE.') + } + found_doctype = true + doctype = parse_doctype(mut reader)! + } + else { + return error('Unsupported control sequence found in prolog.') + } + } + } + else { + // We have found the start of the root node + break + } + } + } + else {} + } + } + + return Prolog{ + version: version + encoding: encoding + doctype: doctype + comments: comments + }, ch +} + +fn parse_children(name string, attributes map[string]string, mut reader io.Reader) !XMLNode { + mut inner_contents := strings.new_builder(xml.default_string_builder_cap) + + mut children := []XMLNodeContents{} + mut local_buf := [u8(0)] + + for { + ch := next_char(mut reader, mut local_buf)! + match ch { + `<` { + second_char := next_char(mut reader, mut local_buf)! + match second_char { + `!` { + // Comment, CDATA + mut next_two := [u8(0), 0] + if reader.read(mut next_two)! != 2 { + return error('Invalid XML. Incomplete comment or CDATA declaration.') + } + if next_two == xml.double_dash { + // Comment + comment := parse_comment(mut reader)! + children << comment + } else if next_two == xml.c_tag { + // DATA + mut cdata_buf := []u8{len: 4} + if reader.read(mut cdata_buf)! != 4 { + return error('Invalid XML. Incomplete CDATA declaration.') + } + if cdata_buf != xml.data_chars { + return error('Invalid XML. Expected "CDATA" after "` + + if node_end_buffer != ending_chars { + return error('XML node <${name}> not closed.') + } + + collected_contents := inner_contents.str().trim_space() + if collected_contents.len > 0 { + // We have some inner text + children << collected_contents.replace('\r\n', '\n') + } + return XMLNode{ + name: name + attributes: attributes + children: children + } + } + else { + // Start of child node + child := parse_single_node(second_char, mut reader) or { + if err.msg() == 'XML node cannot start with " not closed.') + } else { + return err + } + } + text := inner_contents.str().trim_space() + if text.len > 0 { + children << text.replace('\r\n', '\n') + } + children << child + } + } + } + else { + inner_contents.write_u8(ch) + } + } + } + return error('XML node <${name}> not closed.') +} + +fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode { + mut local_buf := [u8(0)] + mut ch := next_char(mut reader, mut local_buf)! + mut contents := strings.new_builder(xml.default_string_builder_cap) + // We're expecting an opening tag + if ch == `/` { + return error('XML node cannot start with "` { + break + } + contents.write_u8(ch) + } + + tag_contents := contents.str().trim_space() + + parts := tag_contents.split_any(' \t\n') + name := first_char.ascii_str() + parts[0] + + // Check if it is a self-closing tag + if tag_contents.ends_with('/') { + // We're not looking for children and inner text + return XMLNode{ + name: name + attributes: parse_attributes(tag_contents[name.len - 1..tag_contents.len].trim_space())! + } + } + + attribute_string := tag_contents[name.len - 1..].trim_space() + attributes := parse_attributes(attribute_string)! + + return parse_children(name, attributes, mut reader) +} + +// XMLDocument.from_string parses an XML document from a string. +pub fn XMLDocument.from_string(raw_contents string) !XMLDocument { + mut reader := FullBufferReader{ + contents: raw_contents.bytes() + } + return XMLDocument.from_reader(mut reader)! +} + +// XMLDocument.from_file parses an XML document from a file. Note that the file is read in its entirety +// and then parsed. If the file is too large, try using the XMLDocument.from_reader function instead. +pub fn XMLDocument.from_file(path string) !XMLDocument { + mut reader := FullBufferReader{ + contents: os.read_bytes(path)! + } + return XMLDocument.from_reader(mut reader)! +} + +// XMLDocument.from_reader parses an XML document from a reader. This is the most generic way to parse +// an XML document from any arbitrary source that implements that io.Reader interface. +pub fn XMLDocument.from_reader(mut reader io.Reader) !XMLDocument { + prolog, first_char := parse_prolog(mut reader) or { + if err is os.Eof || err is io.Eof || err.msg() == 'Unexpected End Of File.' { + return error('XML document is empty.') + } else { + return err + } + } + + root := parse_single_node(first_char, mut reader)! + + return XMLDocument{ + version: prolog.version + encoding: prolog.encoding + comments: prolog.comments + doctype: prolog.doctype + root: root + } +} diff --git a/vlib/encoding/xml/query.v b/vlib/encoding/xml/query.v new file mode 100644 index 0000000000..9d310aff7f --- /dev/null +++ b/vlib/encoding/xml/query.v @@ -0,0 +1,60 @@ +module xml + +fn (node XMLNode) get_element_by_id(id string) ?XMLNode { + // Is this the node we're looking for? + if attribute_id := node.attributes['id'] { + if attribute_id == id { + return node + } + } + + if node.children.len == 0 { + return none + } + + // Recurse into children + for child in node.children { + match child { + XMLNode { + if result := child.get_element_by_id(id) { + return result + } + } + else {} + } + } + + return none +} + +fn (node XMLNode) get_elements_by_tag(tag string) []XMLNode { + mut result := []XMLNode{} + + if node.name == tag { + result << node + } + + if node.children.len == 0 { + return result + } + + // Recurse into children + for child in node.children { + if child is XMLNode { + result << child.get_elements_by_tag(tag) + } + } + + return result +} + +// get_element_by_id returns the first element with the given id, or none if no +// such element exists. +pub fn (doc XMLDocument) get_element_by_id(id string) ?XMLNode { + return doc.root.get_element_by_id(id) +} + +// get_elements_by_tag returns all elements with the given tag name. +pub fn (doc XMLDocument) get_elements_by_tag(tag string) []XMLNode { + return doc.root.get_elements_by_tag(tag) +} diff --git a/vlib/encoding/xml/reader_util.v b/vlib/encoding/xml/reader_util.v new file mode 100644 index 0000000000..9ea26be97e --- /dev/null +++ b/vlib/encoding/xml/reader_util.v @@ -0,0 +1,30 @@ +module xml + +import io + +fn next_char(mut reader io.Reader, mut buf []u8) !u8 { + if reader.read(mut buf)! == 0 { + return error('Unexpected End Of File.') + } + return buf[0] +} + +struct FullBufferReader { + contents []u8 +mut: + position int +} + +[direct_array_access] +fn (mut fbr FullBufferReader) read(mut buf []u8) !int { + if fbr.position >= fbr.contents.len { + return io.Eof{} + } + remaining := fbr.contents.len - fbr.position + n := if buf.len < remaining { buf.len } else { remaining } + unsafe { + vmemcpy(&u8(buf.data), &u8(fbr.contents.data) + fbr.position, n) + } + fbr.position += n + return n +} diff --git a/vlib/encoding/xml/test/gtk/gtk_test.v b/vlib/encoding/xml/test/gtk/gtk_test.v new file mode 100644 index 0000000000..bb8c13f38c --- /dev/null +++ b/vlib/encoding/xml/test/gtk/gtk_test.v @@ -0,0 +1,89 @@ +module main + +import encoding.xml +import os + +fn test_large_gtk_file() ! { + // Note: If you are contributing to this project, you should download the + // GIR file from https://raw.githubusercontent.com/gtk-rs/gir-files/master/Gtk-4.0.gir + // and place it in the same directory as this file. + path := os.join_path(os.dir(@FILE), 'Gtk-4.0.gir') + if !os.exists(path) { + println('Skipping test_large_gtk_file because file does not exist.') + return + } + + actual := xml.XMLDocument.from_file(path) or { + return error('Failed to parse large GTK XML file') + } + + mut valid := false + for elm in actual.get_elements_by_tag('class') { + if 'c:type' in elm.attributes && elm.attributes['c:type'] == 'GtkWindow' { + assert elm.attributes['parent'] == 'Widget' + assert elm.attributes['c:symbol-prefix'] == 'window' + valid = true + } + } + assert valid, 'GtkWindow class not found!' + + valid = false + for elm in actual.get_elements_by_tag('constructor') { + if 'c:identifier' in elm.attributes && elm.attributes['c:identifier'] == 'gtk_window_new' { + assert elm == xml.XMLNode{ + name: 'constructor' + attributes: { + 'name': 'new' + 'c:identifier': 'gtk_window_new' + } + children: [ + xml.XMLNodeContents(xml.XMLNode{ + name: 'doc' + attributes: { + 'xml:space': 'preserve' + } + children: [ + xml.XMLNodeContents('Creates a new `GtkWindow`. + +To get an undecorated window (no window borders), use +[method@Gtk.Window.set_decorated]. + +All top-level windows created by gtk_window_new() are stored +in an internal top-level window list. This list can be obtained +from [func@Gtk.Window.list_toplevels]. Due to GTK keeping a +reference to the window internally, gtk_window_new() does not +return a reference to the caller. + +To delete a `GtkWindow`, call [method@Gtk.Window.destroy].'), + ] + }), + xml.XMLNodeContents(xml.XMLNode{ + name: 'return-value' + attributes: { + 'transfer-ownership': 'none' + } + children: [ + xml.XMLNodeContents(xml.XMLNode{ + name: 'doc' + attributes: { + 'xml:space': 'preserve' + } + children: [xml.XMLNodeContents('a new `GtkWindow`.')] + }), + xml.XMLNodeContents(xml.XMLNode{ + name: 'type' + attributes: { + 'name': 'Widget' + 'c:type': 'GtkWidget*' + } + children: [] + }), + ] + }), + ] + } + valid = true + } + } + assert valid, 'gtk_window_new constructor not found!' +} diff --git a/vlib/encoding/xml/test/local/01_mdn_example/hello_world.xml b/vlib/encoding/xml/test/local/01_mdn_example/hello_world.xml new file mode 100644 index 0000000000..0ecb2e96d5 --- /dev/null +++ b/vlib/encoding/xml/test/local/01_mdn_example/hello_world.xml @@ -0,0 +1,6 @@ + + + + Hello, World! + + diff --git a/vlib/encoding/xml/test/local/01_mdn_example/hello_world_test.v b/vlib/encoding/xml/test/local/01_mdn_example/hello_world_test.v new file mode 100644 index 0000000000..78cff10ff9 --- /dev/null +++ b/vlib/encoding/xml/test/local/01_mdn_example/hello_world_test.v @@ -0,0 +1,23 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'hello_world.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'message' + children: [ + xml.XMLNode{ + name: 'greeting' + children: [ + 'Hello, World!', + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/02_note_message/note.xml b/vlib/encoding/xml/test/local/02_note_message/note.xml new file mode 100644 index 0000000000..97de6219ff --- /dev/null +++ b/vlib/encoding/xml/test/local/02_note_message/note.xml @@ -0,0 +1,6 @@ + + Tove + Jani + Reminder + Don't forget me this weekend! + diff --git a/vlib/encoding/xml/test/local/02_note_message/note_test.v b/vlib/encoding/xml/test/local/02_note_message/note_test.v new file mode 100644 index 0000000000..4d39988744 --- /dev/null +++ b/vlib/encoding/xml/test/local/02_note_message/note_test.v @@ -0,0 +1,41 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'note.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'note' + children: [ + xml.XMLNode{ + name: 'to' + children: [ + 'Tove', + ] + }, + xml.XMLNode{ + name: 'from' + children: [ + 'Jani', + ] + }, + xml.XMLNode{ + name: 'heading' + children: [ + 'Reminder', + ] + }, + xml.XMLNode{ + name: 'body' + children: [ + "Don't forget me this weekend!", + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/03_cd_catalogue/cd_catalog.xml b/vlib/encoding/xml/test/local/03_cd_catalogue/cd_catalog.xml new file mode 100644 index 0000000000..7e0412df6f --- /dev/null +++ b/vlib/encoding/xml/test/local/03_cd_catalogue/cd_catalog.xml @@ -0,0 +1,34 @@ + + + Empire Burlesque + Bob Dylan + USA + Columbia + 10.90 + 1985 + + + Hide your heart + Bonnie Tyler + UK + CBS Records + 9.90 + 1988 + + + Greatest Hits + Dolly Parton + USA + RCA + 9.90 + 1982 + + + Still got the blues + Gary Moore + UK + Virgin records + 10.20 + 1990 + + diff --git a/vlib/encoding/xml/test/local/03_cd_catalogue/cd_test.v b/vlib/encoding/xml/test/local/03_cd_catalogue/cd_test.v new file mode 100644 index 0000000000..85a24e8369 --- /dev/null +++ b/vlib/encoding/xml/test/local/03_cd_catalogue/cd_test.v @@ -0,0 +1,181 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'cd_catalog.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'CATALOG' + children: [ + xml.XMLNode{ + name: 'CD' + children: [ + xml.XMLNode{ + name: 'TITLE' + children: [ + 'Empire Burlesque', + ] + }, + xml.XMLNode{ + name: 'ARTIST' + children: [ + 'Bob Dylan', + ] + }, + xml.XMLNode{ + name: 'COUNTRY' + children: [ + 'USA', + ] + }, + xml.XMLNode{ + name: 'COMPANY' + children: [ + 'Columbia', + ] + }, + xml.XMLNode{ + name: 'PRICE' + children: [ + '10.90', + ] + }, + xml.XMLNode{ + name: 'YEAR' + children: [ + '1985', + ] + }, + ] + }, + xml.XMLNode{ + name: 'CD' + children: [ + xml.XMLNode{ + name: 'TITLE' + children: [ + 'Hide your heart', + ] + }, + xml.XMLNode{ + name: 'ARTIST' + children: [ + 'Bonnie Tyler', + ] + }, + xml.XMLNode{ + name: 'COUNTRY' + children: [ + 'UK', + ] + }, + xml.XMLNode{ + name: 'COMPANY' + children: [ + 'CBS Records', + ] + }, + xml.XMLNode{ + name: 'PRICE' + children: [ + '9.90', + ] + }, + xml.XMLNode{ + name: 'YEAR' + children: [ + '1988', + ] + }, + ] + }, + xml.XMLNode{ + name: 'CD' + children: [ + xml.XMLNode{ + name: 'TITLE' + children: [ + 'Greatest Hits', + ] + }, + xml.XMLNode{ + name: 'ARTIST' + children: [ + 'Dolly Parton', + ] + }, + xml.XMLNode{ + name: 'COUNTRY' + children: [ + 'USA', + ] + }, + xml.XMLNode{ + name: 'COMPANY' + children: [ + 'RCA', + ] + }, + xml.XMLNode{ + name: 'PRICE' + children: [ + '9.90', + ] + }, + xml.XMLNode{ + name: 'YEAR' + children: [ + '1982', + ] + }, + ] + }, + xml.XMLNode{ + name: 'CD' + children: [ + xml.XMLNode{ + name: 'TITLE' + children: [ + 'Still got the blues', + ] + }, + xml.XMLNode{ + name: 'ARTIST' + children: [ + 'Gary Moore', + ] + }, + xml.XMLNode{ + name: 'COUNTRY' + children: [ + 'UK', + ] + }, + xml.XMLNode{ + name: 'COMPANY' + children: [ + 'Virgin records', + ] + }, + xml.XMLNode{ + name: 'PRICE' + children: [ + '10.20', + ] + }, + xml.XMLNode{ + name: 'YEAR' + children: [ + '1990', + ] + }, + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/04_empty_file/empty.xml b/vlib/encoding/xml/test/local/04_empty_file/empty.xml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vlib/encoding/xml/test/local/04_empty_file/expected_error.txt b/vlib/encoding/xml/test/local/04_empty_file/expected_error.txt new file mode 100644 index 0000000000..3aa0a89efc --- /dev/null +++ b/vlib/encoding/xml/test/local/04_empty_file/expected_error.txt @@ -0,0 +1 @@ +XML document is empty. diff --git a/vlib/encoding/xml/test/local/05_single_element/root.xml b/vlib/encoding/xml/test/local/05_single_element/root.xml new file mode 100644 index 0000000000..806295f2d7 --- /dev/null +++ b/vlib/encoding/xml/test/local/05_single_element/root.xml @@ -0,0 +1 @@ +Single root element. diff --git a/vlib/encoding/xml/test/local/05_single_element/root_test.v b/vlib/encoding/xml/test/local/05_single_element/root_test.v new file mode 100644 index 0000000000..3043bb8c73 --- /dev/null +++ b/vlib/encoding/xml/test/local/05_single_element/root_test.v @@ -0,0 +1,18 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'root.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'sample' + children: [ + 'Single root element.', + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/06_nested_elements/nested.xml b/vlib/encoding/xml/test/local/06_nested_elements/nested.xml new file mode 100644 index 0000000000..a6f6a64e37 --- /dev/null +++ b/vlib/encoding/xml/test/local/06_nested_elements/nested.xml @@ -0,0 +1,14 @@ + + + + + Deeply nested content. + + + + + + Less deeply nested content. + + + diff --git a/vlib/encoding/xml/test/local/06_nested_elements/nested_test.v b/vlib/encoding/xml/test/local/06_nested_elements/nested_test.v new file mode 100644 index 0000000000..6a6d9757dd --- /dev/null +++ b/vlib/encoding/xml/test/local/06_nested_elements/nested_test.v @@ -0,0 +1,44 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'nested.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'level1' + children: [ + xml.XMLNode{ + name: 'level2' + children: [ + xml.XMLNode{ + name: 'level3' + children: [ + xml.XMLNode{ + name: 'level4' + children: [ + 'Deeply nested content.', + ] + }, + ] + }, + ] + }, + xml.XMLNode{ + name: 'level2' + children: [ + xml.XMLNode{ + name: 'level3' + children: [ + 'Less deeply nested content.', + ] + }, + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/07_mixed_contents/mixed.xml b/vlib/encoding/xml/test/local/07_mixed_contents/mixed.xml new file mode 100644 index 0000000000..d874066aa3 --- /dev/null +++ b/vlib/encoding/xml/test/local/07_mixed_contents/mixed.xml @@ -0,0 +1,5 @@ + + Dear Mr. John Smith. + Your order 1032 + will be shipped on 2001-07-13. + diff --git a/vlib/encoding/xml/test/local/07_mixed_contents/mixed_test.v b/vlib/encoding/xml/test/local/07_mixed_contents/mixed_test.v new file mode 100644 index 0000000000..7032e1afbe --- /dev/null +++ b/vlib/encoding/xml/test/local/07_mixed_contents/mixed_test.v @@ -0,0 +1,33 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'mixed.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'letter' + children: [ + 'Dear Mr.', + xml.XMLNode{ + name: 'name' + children: ['John Smith'] + }, + '.\n Your order', + xml.XMLNode{ + name: 'orderid' + children: ['1032'] + }, + 'will be shipped on', + xml.XMLNode{ + name: 'shipdate' + children: ['2001-07-13'] + }, + '.', + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/08_comments/comment.xml b/vlib/encoding/xml/test/local/08_comments/comment.xml new file mode 100644 index 0000000000..661c0de625 --- /dev/null +++ b/vlib/encoding/xml/test/local/08_comments/comment.xml @@ -0,0 +1,12 @@ + + +
+ + Jones + + ABSystems + + + (046) 1233-44778 + +
diff --git a/vlib/encoding/xml/test/local/08_comments/comment_test.v b/vlib/encoding/xml/test/local/08_comments/comment_test.v new file mode 100644 index 0000000000..749ea1f390 --- /dev/null +++ b/vlib/encoding/xml/test/local/08_comments/comment_test.v @@ -0,0 +1,42 @@ +import os +import encoding.xml + +fn test_valid_parsing() ! { + path := os.join_path(os.dir(@FILE), 'comment.xml') + + expected := xml.XMLDocument{ + comments: [ + xml.XMLComment{ + text: ' Employee Information' + }, + ] + root: xml.XMLNode{ + name: 'address' + children: [ + xml.XMLComment{ + text: ' Full or first name ' + }, + xml.XMLNode{ + name: 'name' + children: ['Jones'] + }, + xml.XMLComment{ + text: ' Registered name of the company -> ' + }, + xml.XMLNode{ + name: 'company' + children: ['ABSystems'] + }, + xml.XMLNode{ + name: 'phone' + children: [xml.XMLComment{ + text: ' Phone with country code -) ' + }, '(046) 1233-44778'] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/09_malformed/expected_error.txt b/vlib/encoding/xml/test/local/09_malformed/expected_error.txt new file mode 100644 index 0000000000..5c89ad324d --- /dev/null +++ b/vlib/encoding/xml/test/local/09_malformed/expected_error.txt @@ -0,0 +1 @@ +Malformed XML. Found "<" in attribute string: "Sample diff --git a/vlib/encoding/xml/test/local/10_missing_tag/expected_error.txt b/vlib/encoding/xml/test/local/10_missing_tag/expected_error.txt new file mode 100644 index 0000000000..bd4daa1058 --- /dev/null +++ b/vlib/encoding/xml/test/local/10_missing_tag/expected_error.txt @@ -0,0 +1 @@ +XML node not closed. diff --git a/vlib/encoding/xml/test/local/10_missing_tag/malformed.xml b/vlib/encoding/xml/test/local/10_missing_tag/malformed.xml new file mode 100644 index 0000000000..c10a92764a --- /dev/null +++ b/vlib/encoding/xml/test/local/10_missing_tag/malformed.xml @@ -0,0 +1,6 @@ + + + + Hello World + + diff --git a/vlib/encoding/xml/test/local/11_cdata_content/cdata.xml b/vlib/encoding/xml/test/local/11_cdata_content/cdata.xml new file mode 100644 index 0000000000..440641ec4d --- /dev/null +++ b/vlib/encoding/xml/test/local/11_cdata_content/cdata.xml @@ -0,0 +1,4 @@ + + This is <b>bold</b> + bold]]> + diff --git a/vlib/encoding/xml/test/local/11_cdata_content/cdata_test.v b/vlib/encoding/xml/test/local/11_cdata_content/cdata_test.v new file mode 100644 index 0000000000..069488d4bf --- /dev/null +++ b/vlib/encoding/xml/test/local/11_cdata_content/cdata_test.v @@ -0,0 +1,29 @@ +module main + +import os +import encoding.xml + +fn test_valid_parsing() { + path := os.join_path(os.dir(@FILE), 'cdata.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'sample' + children: [ + xml.XMLNode{ + name: 'html' + children: ['This is <b>bold</b>'] + }, + xml.XMLNode{ + name: 'html' + children: [xml.XMLCData{ + text: 'This is bold' + }] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/12_doctype_entity/entity.xml b/vlib/encoding/xml/test/local/12_doctype_entity/entity.xml new file mode 100644 index 0000000000..d2273a8849 --- /dev/null +++ b/vlib/encoding/xml/test/local/12_doctype_entity/entity.xml @@ -0,0 +1,7 @@ + + +]> + + &warning; + diff --git a/vlib/encoding/xml/test/local/12_doctype_entity/spec_entity_test.v b/vlib/encoding/xml/test/local/12_doctype_entity/spec_entity_test.v new file mode 100644 index 0000000000..1327040bd7 --- /dev/null +++ b/vlib/encoding/xml/test/local/12_doctype_entity/spec_entity_test.v @@ -0,0 +1,41 @@ +module main + +import os +import encoding.xml + +fn test_valid_parsing() { + path := os.join_path(os.dir(@FILE), 'entity.xml') + + mut reverse_entities := xml.default_entities_reverse.clone() + reverse_entities['Warning: Something bad happened... please refresh and try again.'] = 'warning' + + expected := xml.XMLDocument{ + parsed_reverse_entities: reverse_entities + doctype: xml.DocumentType{ + name: 'body' + dtd: xml.DocumentTypeDefinition{ + name: '' + list: [ + xml.DTDEntity{ + name: 'warning' + value: 'Warning: Something bad happened... please refresh and try again.' + }, + ] + } + } + root: xml.XMLNode{ + name: 'body' + children: [ + xml.XMLNode{ + name: 'message' + children: [ + 'Warning: Something bad happened... please refresh and try again.', + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)!.validate()! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/13_doctype_element/doctype_test.v b/vlib/encoding/xml/test/local/13_doctype_element/doctype_test.v new file mode 100644 index 0000000000..22f952e977 --- /dev/null +++ b/vlib/encoding/xml/test/local/13_doctype_element/doctype_test.v @@ -0,0 +1,71 @@ +module main + +import os +import encoding.xml + +fn test_valid_parsing() { + path := os.join_path(os.dir(@FILE), 'element.xml') + + expected := xml.XMLDocument{ + doctype: xml.DocumentType{ + name: 'note' + dtd: xml.DocumentTypeDefinition{ + name: '' + list: [ + xml.DTDElement{ + name: 'note' + definition: ['to', 'from', 'heading', 'body'] + }, + xml.DTDElement{ + name: 'to' + definition: ['#PCDATA'] + }, + xml.DTDElement{ + name: 'from' + definition: ['#PCDATA'] + }, + xml.DTDElement{ + name: 'heading' + definition: ['#PCDATA'] + }, + xml.DTDElement{ + name: 'body' + definition: ['#PCDATA'] + }, + ] + } + } + root: xml.XMLNode{ + name: 'note' + children: [ + xml.XMLNode{ + name: 'to' + children: [ + 'Tove', + ] + }, + xml.XMLNode{ + name: 'from' + children: [ + 'Jani', + ] + }, + xml.XMLNode{ + name: 'heading' + children: [ + 'Reminder', + ] + }, + xml.XMLNode{ + name: 'body' + children: [ + "Don't forget me this weekend!", + ] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)!.validate()! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/13_doctype_element/element.xml b/vlib/encoding/xml/test/local/13_doctype_element/element.xml new file mode 100644 index 0000000000..943eba6fe7 --- /dev/null +++ b/vlib/encoding/xml/test/local/13_doctype_element/element.xml @@ -0,0 +1,14 @@ + + + + + + +]> + + Tove + Jani + Reminder + Don't forget me this weekend! + diff --git a/vlib/encoding/xml/test/local/14_attributes/attributes.xml b/vlib/encoding/xml/test/local/14_attributes/attributes.xml new file mode 100644 index 0000000000..7d1cf06e25 --- /dev/null +++ b/vlib/encoding/xml/test/local/14_attributes/attributes.xml @@ -0,0 +1,7 @@ + + + Learning XML + Erik T. Ray + 2003 + 39.95 + diff --git a/vlib/encoding/xml/test/local/14_attributes/attributes_test.v b/vlib/encoding/xml/test/local/14_attributes/attributes_test.v new file mode 100644 index 0000000000..721f8ce059 --- /dev/null +++ b/vlib/encoding/xml/test/local/14_attributes/attributes_test.v @@ -0,0 +1,45 @@ +module main + +import os +import encoding.xml + +fn test_valid_parsing() { + path := os.join_path(os.dir(@FILE), 'attributes.xml') + + expected := xml.XMLDocument{ + root: xml.XMLNode{ + name: 'book' + attributes: { + 'category': 'web' + } + children: [ + xml.XMLNode{ + name: 'title' + attributes: { + 'lang': 'en' + 'code:type': 'const char*' + } + children: ['Learning XML'] + }, + xml.XMLNode{ + name: 'author' + attributes: { + 'attr': ' surrounding spaces ' + } + children: ['Erik T. Ray'] + }, + xml.XMLNode{ + name: 'year' + children: ['2003'] + }, + xml.XMLNode{ + name: 'price' + children: ['39.95'] + }, + ] + } + } + actual := xml.XMLDocument.from_file(path)! + + assert expected == actual, 'Parsed XML document should be equal to expected XML document' +} diff --git a/vlib/encoding/xml/test/local/15_incomplete_entity_1/entity.xml b/vlib/encoding/xml/test/local/15_incomplete_entity_1/entity.xml new file mode 100644 index 0000000000..aa0548d102 --- /dev/null +++ b/vlib/encoding/xml/test/local/15_incomplete_entity_1/entity.xml @@ -0,0 +1,6 @@ + + +]> + + diff --git a/vlib/encoding/xml/test/local/15_incomplete_entity_1/expected_error.txt b/vlib/encoding/xml/test/local/15_incomplete_entity_1/expected_error.txt new file mode 100644 index 0000000000..16440af110 --- /dev/null +++ b/vlib/encoding/xml/test/local/15_incomplete_entity_1/expected_error.txt @@ -0,0 +1 @@ +Entity is missing name. diff --git a/vlib/encoding/xml/test/local/16_incomplete_entity_2/entity.xml b/vlib/encoding/xml/test/local/16_incomplete_entity_2/entity.xml new file mode 100644 index 0000000000..99e70e8f82 --- /dev/null +++ b/vlib/encoding/xml/test/local/16_incomplete_entity_2/entity.xml @@ -0,0 +1,7 @@ + + +]> + + &missing; + diff --git a/vlib/encoding/xml/test/local/16_incomplete_entity_2/expected_error.txt b/vlib/encoding/xml/test/local/16_incomplete_entity_2/expected_error.txt new file mode 100644 index 0000000000..c233fb573e --- /dev/null +++ b/vlib/encoding/xml/test/local/16_incomplete_entity_2/expected_error.txt @@ -0,0 +1 @@ +Entity is missing value. diff --git a/vlib/encoding/xml/test/local/17_incomplete_element_1/element.xml b/vlib/encoding/xml/test/local/17_incomplete_element_1/element.xml new file mode 100644 index 0000000000..278d57c1ef --- /dev/null +++ b/vlib/encoding/xml/test/local/17_incomplete_element_1/element.xml @@ -0,0 +1,6 @@ + + +]> + + diff --git a/vlib/encoding/xml/test/local/17_incomplete_element_1/expected_error.txt b/vlib/encoding/xml/test/local/17_incomplete_element_1/expected_error.txt new file mode 100644 index 0000000000..1b0e883665 --- /dev/null +++ b/vlib/encoding/xml/test/local/17_incomplete_element_1/expected_error.txt @@ -0,0 +1 @@ +Element is missing name. diff --git a/vlib/encoding/xml/test/local/18_incomplete_element_2/element.xml b/vlib/encoding/xml/test/local/18_incomplete_element_2/element.xml new file mode 100644 index 0000000000..4dea5558b9 --- /dev/null +++ b/vlib/encoding/xml/test/local/18_incomplete_element_2/element.xml @@ -0,0 +1,6 @@ + + +]> + + diff --git a/vlib/encoding/xml/test/local/18_incomplete_element_2/expected_error.txt b/vlib/encoding/xml/test/local/18_incomplete_element_2/expected_error.txt new file mode 100644 index 0000000000..a99dee06c8 --- /dev/null +++ b/vlib/encoding/xml/test/local/18_incomplete_element_2/expected_error.txt @@ -0,0 +1 @@ +Invalid element definition: invalid diff --git a/vlib/encoding/xml/test/spec_test.v b/vlib/encoding/xml/test/spec_test.v new file mode 100644 index 0000000000..a4f5c5a785 --- /dev/null +++ b/vlib/encoding/xml/test/spec_test.v @@ -0,0 +1,25 @@ +module main + +import os +import encoding.xml + +// All the XML files in the spec directory obtained recursively +const spec_files = os.walk_ext(os.join_path(os.dir(@FILE), 'local'), 'xml') + +fn test_can_parse_all_files() ! { + assert spec_files.len > 0, 'No XML files found in the spec directory' + for file in spec_files { + doc := xml.XMLDocument.from_file(file) or { + // Parsing failed. Check if this was an expected error. + parent := os.dir(file) + error_file := os.join_path(parent, 'expected_error.txt') + error_text := os.read_file(error_file) or { + // No expected error. Fail the test. + return error('Failed to parse XML file: ' + file) + } + // Check if the error message matches the expected error. + assert err.msg().trim_space() == error_text.trim_space() + continue + } + } +} diff --git a/vlib/encoding/xml/types.v b/vlib/encoding/xml/types.v new file mode 100644 index 0000000000..f61b7c9531 --- /dev/null +++ b/vlib/encoding/xml/types.v @@ -0,0 +1,71 @@ +module xml + +pub type XMLNodeContents = XMLCData | XMLComment | XMLNode | string + +pub struct XMLCData { +pub: + text string [required] +} + +pub struct XMLComment { +pub: + text string [required] +} + +// XMLNode represents a single XML node. It contains the node name, +// a map of attributes, and a list of children. The children can be +// other XML nodes, CDATA, plain text, or comments. +pub struct XMLNode { +pub: + name string [required] + attributes map[string]string + children []XMLNodeContents +} + +// XMLDocument is the struct that represents a single XML document. +// It contains the prolog and the single root node. The prolog struct +// is embedded into the XMLDocument struct, so that the prolog fields +// are accessible directly from the this struct. +// Public prolog fields include version, enccoding, comments preceding +// the root node, and the document type definition. +pub struct XMLDocument { + Prolog +pub: + root XMLNode [required] +} + +pub type DTDListItem = DTDElement | DTDEntity + +pub struct DTDEntity { + name string [required] + value string [required] +} + +pub struct DTDElement { + name string [required] + definition []string [required] +} + +pub struct DocumentTypeDefinition { + name string + list []DTDListItem +} + +pub struct DocumentType { + name string [required] + dtd DTDInfo +} + +type DTDInfo = DocumentTypeDefinition | string + +struct Prolog { + parsed_reverse_entities map[string]string = default_entities_reverse.clone() +pub: + version string = '1.0' + encoding string = 'UTF-8' + doctype DocumentType = DocumentType{ + name: '' + dtd: '' + } + comments []XMLComment +} diff --git a/vlib/encoding/xml/validation.v b/vlib/encoding/xml/validation.v new file mode 100644 index 0000000000..f20d33be1c --- /dev/null +++ b/vlib/encoding/xml/validation.v @@ -0,0 +1,96 @@ +module xml + +fn (node XMLNode) validate(elements map[string]DTDElement, entities map[string]string) !XMLNode { + mut children := []XMLNodeContents{cap: node.children.len} + + valid_elements := elements[node.name].definition + mut validate_node_children := node.name in elements + + // Check if the node will match everything + if valid_elements.len == 1 && valid_elements[0] == '#PCDATA' { + validate_node_children = false + } + + for child in node.children { + match child { + XMLNode { + if validate_node_children { + name := child.name + if name !in valid_elements { + return error('Invalid child element ${name} for ${node.name}') + } + } + children << child.validate(elements, entities)! + } + string { + children << unescape_text(child, entities: entities)! + } + else { + // Ignore other nodes + children << child + } + } + } + + return XMLNode{ + name: node.name + attributes: node.attributes + children: children + } +} + +// validate checks the document is well-formed and valid. It returns a new +// document with the parsed entities expanded when validation is successful. +// Otherwise it returns an error. +pub fn (doc XMLDocument) validate() !XMLDocument { + // The document is well-formed because we were able to parse it properly. + match doc.doctype.dtd { + DocumentTypeDefinition { + // Store the element and entity definitions + mut elements := map[string]DTDElement{} + mut entities := default_entities.clone() + mut reverse_entities := default_entities_reverse.clone() + + for item in doc.doctype.dtd.list { + match item { + DTDElement { + name := item.name + if name in elements { + return error('Duplicate element definition for ${name}') + } + elements[name] = item + } + DTDEntity { + name := item.name + if name in entities { + return error('Duplicate entity definition for ${name}') + } + entities[name] = item.value + reverse_entities[item.value] = name + } + } + } + + // Now validate the document against the elements and entities. + new_root := doc.root.validate(elements, entities)! + + // Check the DOCTYPE name matches the root name + if doc.doctype.name.len > 0 && doc.doctype.name != new_root.name { + return error('Root element ${new_root.name} does not match DOCTYPE ${doc.doctype.name}') + } + + return XMLDocument{ + version: doc.version + encoding: doc.encoding + doctype: doc.doctype + comments: doc.comments + root: new_root + parsed_reverse_entities: reverse_entities + } + } + string { + // TODO: Validate the document against the DTD string. + return doc + } + } +}