mirror of
https://github.com/vlang/v.git
synced 2025-09-13 22:42:26 +03:00
vlib: add an encoding.xml
module with parser, validation, entity encoding, unit tests (#19708)
This commit is contained in:
parent
01022e918e
commit
35558df96c
48 changed files with 2004 additions and 1 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -130,4 +130,9 @@ vls.log
|
|||
wasm.v
|
||||
TAGS
|
||||
tags
|
||||
|
||||
# ignore large GTK *.gir files
|
||||
Gtk-4.0.gir
|
||||
*.gir
|
||||
|
||||
vlib/builtin/js/*.js
|
44
vlib/encoding/xml/README.md
Normal file
44
vlib/encoding/xml/README.md
Normal file
|
@ -0,0 +1,44 @@
|
|||
## Description
|
||||
|
||||
`xml` is a module to parse XML documents into a tree structure. It also supports
|
||||
validation of XML documents against a DTD.
|
||||
|
||||
Note that this is not a streaming XML parser. It reads the entire document into
|
||||
memory and then parses it. This is not a problem for small documents, but it
|
||||
might be a problem for extremely large documents (several hundred megabytes or more).
|
||||
|
||||
## Usage
|
||||
|
||||
### Parsing XML Files
|
||||
|
||||
There are three different ways to parse an XML Document:
|
||||
|
||||
1. Pass the entire XML document as a string to `XMLDocument.from_string`.
|
||||
2. Specify a file path to `XMLDocument.from_file`.
|
||||
3. Use a source that implements `io.Reader` and pass it to `XMLDocument.from_reader`.
|
||||
|
||||
```v
|
||||
import encoding.xml
|
||||
|
||||
//...
|
||||
doc := xml.XMLDocument.from_file('test/sample.xml')!
|
||||
```
|
||||
|
||||
### Validating XML Documents
|
||||
|
||||
Simply call `validate` on the parsed XML document.
|
||||
|
||||
### Querying
|
||||
|
||||
Check the `get_element...` methods defined on the XMLDocument struct.
|
||||
|
||||
### Escaping and Un-escaping XML Entities
|
||||
|
||||
When the `validate` method is called, the XML document is parsed and all text
|
||||
nodes are un-escaped. This means that the text nodes will contain the actual
|
||||
text and not the escaped version of the text.
|
||||
|
||||
When the XML document is serialized (using `str` or `pretty_str`), all text nodes are escaped.
|
||||
|
||||
The escaping and un-escaping can also be done manually using the `escape_text` and
|
||||
`unescape_text` methods.
|
148
vlib/encoding/xml/encoding.v
Normal file
148
vlib/encoding/xml/encoding.v
Normal file
|
@ -0,0 +1,148 @@
|
|||
module xml
|
||||
|
||||
import strings
|
||||
|
||||
// pretty_str returns a pretty-printed version of the XML node. It requires the current indentation
|
||||
// the node is at, the depth of the node in the tree, and a map of reverse entities to use when
|
||||
// escaping text.
|
||||
pub fn (node XMLNode) pretty_str(original_indent string, depth int, reverse_entities map[string]string) string {
|
||||
// Create the proper indentation first
|
||||
mut indent_builder := strings.new_builder(original_indent.len * depth)
|
||||
for _ in 0 .. depth {
|
||||
indent_builder.write_string(original_indent)
|
||||
}
|
||||
indent := indent_builder.str()
|
||||
|
||||
// Now we can stringify the node
|
||||
mut builder := strings.new_builder(1024)
|
||||
builder.write_string(indent)
|
||||
builder.write_u8(`<`)
|
||||
builder.write_string(node.name)
|
||||
|
||||
for key, value in node.attributes {
|
||||
builder.write_u8(` `)
|
||||
builder.write_string(key)
|
||||
builder.write_string('="')
|
||||
builder.write_string(value)
|
||||
builder.write_u8(`"`)
|
||||
}
|
||||
builder.write_string('>\n')
|
||||
for child in node.children {
|
||||
match child {
|
||||
string {
|
||||
builder.write_string(indent)
|
||||
builder.write_string(original_indent)
|
||||
builder.write_string(escape_text(child, reverse_entities: reverse_entities))
|
||||
}
|
||||
XMLNode {
|
||||
builder.write_string(child.pretty_str(original_indent, depth + 1, reverse_entities))
|
||||
}
|
||||
XMLComment {
|
||||
builder.write_string(indent)
|
||||
builder.write_string(original_indent)
|
||||
builder.write_string('<!--')
|
||||
builder.write_string(child.text)
|
||||
builder.write_string('-->')
|
||||
}
|
||||
XMLCData {
|
||||
builder.write_string(indent)
|
||||
builder.write_string(original_indent)
|
||||
builder.write_string('<![CDATA[')
|
||||
builder.write_string(child.text)
|
||||
builder.write_string(']]>')
|
||||
}
|
||||
}
|
||||
builder.write_u8(`\n`)
|
||||
}
|
||||
builder.write_string(indent)
|
||||
builder.write_string('</')
|
||||
builder.write_string(node.name)
|
||||
builder.write_u8(`>`)
|
||||
return builder.str()
|
||||
}
|
||||
|
||||
fn (list []DTDListItem) pretty_str(indent string) string {
|
||||
if list.len == 0 {
|
||||
return ''
|
||||
}
|
||||
|
||||
mut builder := strings.new_builder(1024)
|
||||
builder.write_u8(`[`)
|
||||
builder.write_u8(`\n`)
|
||||
|
||||
for item in list {
|
||||
match item {
|
||||
DTDEntity {
|
||||
builder.write_string('${indent}<!ENTITY ${item.name} "${item.value}">')
|
||||
}
|
||||
DTDElement {
|
||||
builder.write_string('${indent}<!ELEMENT ${item.name} ${item.definition}>')
|
||||
}
|
||||
}
|
||||
builder.write_u8(`\n`)
|
||||
}
|
||||
builder.write_u8(`]`)
|
||||
return builder.str()
|
||||
}
|
||||
|
||||
fn (doctype DocumentType) pretty_str(indent string) string {
|
||||
match doctype.dtd {
|
||||
string {
|
||||
content := doctype.dtd
|
||||
return if content.len > 0 {
|
||||
'<!DOCTYPE ${doctype.name} SYSTEM "${content}">'
|
||||
} else {
|
||||
''
|
||||
}
|
||||
}
|
||||
DocumentTypeDefinition {
|
||||
if doctype.dtd.list.len == 0 {
|
||||
return ''
|
||||
}
|
||||
|
||||
mut builder := strings.new_builder(1024)
|
||||
builder.write_string('<!DOCTYPE ')
|
||||
builder.write_string(doctype.name)
|
||||
builder.write_string(' ')
|
||||
builder.write_string(doctype.dtd.list.pretty_str(indent))
|
||||
builder.write_string('>')
|
||||
builder.write_u8(`\n`)
|
||||
return builder.str()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pretty_str returns a pretty-printed version of the XML document. It requires the string used to
|
||||
// indent each level of the document.
|
||||
pub fn (doc XMLDocument) pretty_str(indent string) string {
|
||||
mut document_builder := strings.new_builder(1024)
|
||||
|
||||
prolog := '<?xml version="${doc.version}" encoding="${doc.encoding}"?>'
|
||||
comments := if doc.comments.len > 0 {
|
||||
mut comments_buffer := strings.new_builder(512)
|
||||
for comment in doc.comments {
|
||||
comments_buffer.write_string('<!--')
|
||||
comments_buffer.write_string(comment.text)
|
||||
comments_buffer.write_string('-->')
|
||||
comments_buffer.write_u8(`\n`)
|
||||
}
|
||||
comments_buffer.str()
|
||||
} else {
|
||||
''
|
||||
}
|
||||
|
||||
document_builder.write_string(prolog)
|
||||
document_builder.write_u8(`\n`)
|
||||
document_builder.write_string(doc.doctype.pretty_str(indent))
|
||||
document_builder.write_u8(`\n`)
|
||||
document_builder.write_string(comments)
|
||||
document_builder.write_string(doc.root.pretty_str(indent, 0, doc.parsed_reverse_entities))
|
||||
|
||||
return document_builder.str()
|
||||
}
|
||||
|
||||
// str returns a string representation of the XML document. It uses a 2-space indentation
|
||||
// to pretty-print the document.
|
||||
pub fn (doc XMLDocument) str() string {
|
||||
return doc.pretty_str(' ')
|
||||
}
|
79
vlib/encoding/xml/entity.v
Normal file
79
vlib/encoding/xml/entity.v
Normal file
|
@ -0,0 +1,79 @@
|
|||
module xml
|
||||
|
||||
import strings
|
||||
|
||||
pub const default_entities = {
|
||||
'lt': '<'
|
||||
'gt': '>'
|
||||
'amp': '&'
|
||||
'apos': "'"
|
||||
'quot': '"'
|
||||
}
|
||||
|
||||
pub const default_entities_reverse = {
|
||||
'<': 'lt'
|
||||
'>': 'gt'
|
||||
'&': 'amp'
|
||||
"'": 'apos'
|
||||
'"': 'quot'
|
||||
}
|
||||
|
||||
[params]
|
||||
pub struct EscapeConfig {
|
||||
reverse_entities map[string]string = xml.default_entities_reverse
|
||||
}
|
||||
|
||||
// escape_text replaces all entities in the given string with their respective
|
||||
// XML entity strings. See default_entities, which can be overridden.
|
||||
pub fn escape_text(content string, config EscapeConfig) string {
|
||||
mut flattened_entities := []string{cap: 2 * config.reverse_entities.len}
|
||||
|
||||
for target, replacement in config.reverse_entities {
|
||||
flattened_entities << target
|
||||
flattened_entities << '&' + replacement + ';'
|
||||
}
|
||||
|
||||
return content.replace_each(flattened_entities)
|
||||
}
|
||||
|
||||
[params]
|
||||
pub struct UnescapeConfig {
|
||||
entities map[string]string = xml.default_entities
|
||||
}
|
||||
|
||||
// unescape_text replaces all entities in the given string with their respective
|
||||
// original characters or strings. See default_entities_reverse, which can be overridden.
|
||||
pub fn unescape_text(content string, config UnescapeConfig) !string {
|
||||
mut buffer := strings.new_builder(content.len)
|
||||
mut index := 0
|
||||
runes := content.runes()
|
||||
for index < runes.len {
|
||||
match runes[index] {
|
||||
`&` {
|
||||
mut offset := 1
|
||||
mut entity_buf := strings.new_builder(8)
|
||||
for index + offset < runes.len && runes[index + offset] != `;` {
|
||||
entity_buf.write_rune(runes[index + offset])
|
||||
offset++
|
||||
}
|
||||
// Did we reach the end of the string?
|
||||
if index + offset == runes.len {
|
||||
return error('Unexpected end of string while parsing entity.')
|
||||
}
|
||||
// Did we find a valid entity?
|
||||
entity := entity_buf.str()
|
||||
if entity in config.entities {
|
||||
buffer.write_string(config.entities[entity])
|
||||
index += offset
|
||||
} else {
|
||||
return error('Unknown entity: ' + entity)
|
||||
}
|
||||
}
|
||||
else {
|
||||
buffer.write_rune(runes[index])
|
||||
}
|
||||
}
|
||||
index++
|
||||
}
|
||||
return buffer.str()
|
||||
}
|
35
vlib/encoding/xml/entity_test.v
Normal file
35
vlib/encoding/xml/entity_test.v
Normal file
|
@ -0,0 +1,35 @@
|
|||
module main
|
||||
|
||||
import encoding.xml
|
||||
|
||||
fn test_escape() {
|
||||
assert xml.escape_text('Normal string') == 'Normal string'
|
||||
assert xml.escape_text('12 < 34') == '12 < 34'
|
||||
assert xml.escape_text('12 > 34') == '12 > 34'
|
||||
assert xml.escape_text('12 & 34') == '12 & 34'
|
||||
assert xml.escape_text('He said, "Very well, let us proceed."') == 'He said, "Very well, let us proceed."'
|
||||
assert xml.escape_text("He said, 'Very well, let us proceed.'") == 'He said, 'Very well, let us proceed.''
|
||||
|
||||
assert xml.escape_text('Do not escape ©.') == 'Do not escape ©.'
|
||||
|
||||
mut reverse_entities := xml.default_entities_reverse.clone()
|
||||
reverse_entities['©'] = 'copy'
|
||||
assert xml.escape_text('Do escape ©.', reverse_entities: reverse_entities) == 'Do escape ©.'
|
||||
}
|
||||
|
||||
fn test_unescape() ! {
|
||||
assert xml.unescape_text('Normal string')! == 'Normal string'
|
||||
assert xml.unescape_text('12 < 34')! == '12 < 34'
|
||||
assert xml.unescape_text('12 > 34')! == '12 > 34'
|
||||
assert xml.unescape_text('12 & 34')! == '12 & 34'
|
||||
assert xml.unescape_text('He said, "Very well, let us proceed."')! == 'He said, "Very well, let us proceed."'
|
||||
assert xml.unescape_text('He said, 'Very well, let us proceed.'')! == "He said, 'Very well, let us proceed.'"
|
||||
|
||||
xml.unescape_text('12 &invalid; 34') or { assert err.msg() == 'Unknown entity: invalid' }
|
||||
|
||||
xml.unescape_text('Do not unescape ©') or { assert err.msg() == 'Unknown entity: copy' }
|
||||
|
||||
mut entities := xml.default_entities.clone()
|
||||
entities['copy'] = '©'
|
||||
assert xml.unescape_text('Do unescape ©.', entities: entities)! == 'Do unescape ©.'
|
||||
}
|
604
vlib/encoding/xml/parser.v
Normal file
604
vlib/encoding/xml/parser.v
Normal file
|
@ -0,0 +1,604 @@
|
|||
module xml
|
||||
|
||||
import io
|
||||
import os
|
||||
import strings
|
||||
|
||||
const (
|
||||
default_prolog_attributes = {
|
||||
'version': '1.0'
|
||||
'encoding': 'UTF-8'
|
||||
}
|
||||
default_string_builder_cap = 32
|
||||
|
||||
element_len = '<!ELEMENT'.len
|
||||
entity_len = '<!ENTITY'.len
|
||||
|
||||
doctype_chars = 'OCTYPE'.bytes()
|
||||
double_dash = '--'.bytes()
|
||||
c_tag = '[C'.bytes()
|
||||
data_chars = 'DATA'.bytes()
|
||||
)
|
||||
|
||||
// Helper types to assist in parsing
|
||||
|
||||
struct TextSpan {
|
||||
mut:
|
||||
start int
|
||||
end int
|
||||
}
|
||||
|
||||
enum AttributeParserState {
|
||||
key
|
||||
eq
|
||||
value
|
||||
}
|
||||
|
||||
fn parse_attributes(attribute_contents string) !map[string]string {
|
||||
if attribute_contents.contains_u8(`<`) {
|
||||
return error('Malformed XML. Found "<" in attribute string: "${attribute_contents}"')
|
||||
}
|
||||
mut attributes := map[string]string{}
|
||||
|
||||
mut state := AttributeParserState.key
|
||||
mut key_span, mut value_span := TextSpan{}, TextSpan{}
|
||||
|
||||
for index, ch in attribute_contents {
|
||||
match state {
|
||||
.key {
|
||||
match ch {
|
||||
`=` {
|
||||
state = AttributeParserState.eq
|
||||
}
|
||||
else {
|
||||
key_span.end++
|
||||
}
|
||||
}
|
||||
}
|
||||
.eq {
|
||||
match ch {
|
||||
`=` {
|
||||
return error('Duplicate "=" in attribute string: "${attribute_contents}"')
|
||||
}
|
||||
`'`, `"` {
|
||||
state = AttributeParserState.value
|
||||
value_span.start = index + 1
|
||||
}
|
||||
else {
|
||||
return error('Invalid character in attribute string: "${attribute_contents}"')
|
||||
}
|
||||
}
|
||||
}
|
||||
.value {
|
||||
match ch {
|
||||
`'`, `"` {
|
||||
state = AttributeParserState.key
|
||||
value_span.end = index
|
||||
attributes[attribute_contents[key_span.start..key_span.end].trim_space()] = attribute_contents[value_span.start..value_span.end]
|
||||
|
||||
key_span.start = index + 1
|
||||
key_span.end = index + 1
|
||||
}
|
||||
else {
|
||||
state = AttributeParserState.value
|
||||
value_span.end++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return attributes
|
||||
}
|
||||
|
||||
fn parse_comment(mut reader io.Reader) !XMLComment {
|
||||
mut comment_buffer := strings.new_builder(xml.default_string_builder_cap)
|
||||
|
||||
mut local_buf := [u8(0)]
|
||||
for {
|
||||
ch := next_char(mut reader, mut local_buf)!
|
||||
match ch {
|
||||
`-` {
|
||||
after_ch := next_char(mut reader, mut local_buf)!
|
||||
if after_ch == `-` {
|
||||
if next_char(mut reader, mut local_buf)! == `>` {
|
||||
break
|
||||
}
|
||||
return error('XML Comment not closed. Expected ">".')
|
||||
} else {
|
||||
comment_buffer.write_u8(ch)
|
||||
comment_buffer.write_u8(after_ch)
|
||||
}
|
||||
}
|
||||
else {
|
||||
comment_buffer.write_u8(ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
comment_contents := comment_buffer.str()
|
||||
return XMLComment{comment_contents}
|
||||
}
|
||||
|
||||
enum CDATAParserState {
|
||||
normal
|
||||
single
|
||||
double
|
||||
}
|
||||
|
||||
fn parse_cdata(mut reader io.Reader) !XMLCData {
|
||||
mut contents_buf := strings.new_builder(xml.default_string_builder_cap)
|
||||
|
||||
mut state := CDATAParserState.normal
|
||||
mut local_buf := [u8(0)]
|
||||
|
||||
for {
|
||||
ch := next_char(mut reader, mut local_buf)!
|
||||
contents_buf.write_u8(ch)
|
||||
match ch {
|
||||
`]` {
|
||||
match state {
|
||||
.double {
|
||||
// Another ] after the ]] for some reason. Keep the state
|
||||
}
|
||||
.single {
|
||||
state = .double
|
||||
}
|
||||
.normal {
|
||||
state = .single
|
||||
}
|
||||
}
|
||||
}
|
||||
`>` {
|
||||
match state {
|
||||
.double {
|
||||
break
|
||||
}
|
||||
else {
|
||||
state = .normal
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
state = .normal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
contents := contents_buf.str().trim_space()
|
||||
if !contents.ends_with(']]>') {
|
||||
return error('CDATA section not closed.')
|
||||
}
|
||||
return XMLCData{contents[1..contents.len - 3]}
|
||||
}
|
||||
|
||||
fn parse_entity(contents string) !(DTDEntity, string) {
|
||||
// We find the nearest '>' to the start of the ENTITY
|
||||
entity_end := contents.index('>') or { return error('Entity declaration not closed.') }
|
||||
entity_contents := contents[xml.entity_len..entity_end]
|
||||
|
||||
name := entity_contents.trim_left(' \t\n').all_before(' ')
|
||||
if name.len == 0 {
|
||||
return error('Entity is missing name.')
|
||||
}
|
||||
value := entity_contents.all_after_first(name).trim_space().trim('"\'')
|
||||
if value.len == 0 {
|
||||
return error('Entity is missing value.')
|
||||
}
|
||||
|
||||
// TODO: Add support for SYSTEM and PUBLIC entities
|
||||
|
||||
return DTDEntity{name, value}, contents[entity_end + 1..]
|
||||
}
|
||||
|
||||
fn parse_element(contents string) !(DTDElement, string) {
|
||||
// We find the nearest '>' to the start of the ELEMENT
|
||||
element_end := contents.index('>') or { return error('Element declaration not closed.') }
|
||||
element_contents := contents[xml.element_len..element_end].trim_left(' \t\n')
|
||||
|
||||
mut name_span := TextSpan{}
|
||||
|
||||
for ch in element_contents {
|
||||
match ch {
|
||||
` `, `\t`, `\n` {
|
||||
break
|
||||
}
|
||||
// Valid characters in an entity name are:
|
||||
// 1. Lowercase alphabet - a-z
|
||||
// 2. Uppercase alphabet - A-Z
|
||||
// 3. Numbers - 0-9
|
||||
// 4. Underscore - _
|
||||
// 5. Colon - :
|
||||
// 6. Period - .
|
||||
`a`...`z`, `A`...`Z`, `0`...`9`, `_`, `:`, `.` {
|
||||
name_span.end++
|
||||
}
|
||||
else {
|
||||
return error('Invalid character in element name: "${ch}"')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
name := element_contents[name_span.start..name_span.end].trim_left(' \t\n')
|
||||
if name.len == 0 {
|
||||
return error('Element is missing name.')
|
||||
}
|
||||
definition_string := element_contents.all_after_first(name).trim_space().trim('"\'')
|
||||
|
||||
definition := if definition_string.starts_with('(') {
|
||||
// We have a list of possible children
|
||||
|
||||
// Ensure that both ( and ) are present
|
||||
if !definition_string.ends_with(')') {
|
||||
return error('Element declaration not closed.')
|
||||
}
|
||||
|
||||
definition_string.trim('()').split(',')
|
||||
} else {
|
||||
// Invalid definition
|
||||
return error('Invalid element definition: ${definition_string}')
|
||||
}
|
||||
|
||||
// TODO: Add support for SYSTEM and PUBLIC entities
|
||||
|
||||
return DTDElement{name, definition}, contents[element_end + 1..]
|
||||
}
|
||||
|
||||
fn parse_doctype(mut reader io.Reader) !DocumentType {
|
||||
// We may have more < in the doctype so keep count
|
||||
mut depth := 1
|
||||
mut doctype_buffer := strings.new_builder(xml.default_string_builder_cap)
|
||||
mut local_buf := [u8(0)]
|
||||
for {
|
||||
ch := next_char(mut reader, mut local_buf)!
|
||||
doctype_buffer.write_u8(ch)
|
||||
match ch {
|
||||
`<` {
|
||||
depth++
|
||||
}
|
||||
`>` {
|
||||
depth--
|
||||
if depth == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
else {}
|
||||
}
|
||||
}
|
||||
|
||||
doctype_contents := doctype_buffer.str().trim_space()
|
||||
|
||||
name := doctype_contents.all_before('[').trim_space()
|
||||
|
||||
mut list_contents := doctype_contents.all_after('[').all_before(']').trim_space()
|
||||
mut items := []DTDListItem{}
|
||||
|
||||
for list_contents.len > 0 {
|
||||
if list_contents.starts_with('<!ENTITY') {
|
||||
entity, remaining := parse_entity(list_contents)!
|
||||
items << entity
|
||||
list_contents = remaining.trim_space()
|
||||
} else if list_contents.starts_with('<!ELEMENT') {
|
||||
element, remaining := parse_element(list_contents)!
|
||||
items << element
|
||||
list_contents = remaining.trim_space()
|
||||
} else {
|
||||
return error('Unknown DOCTYPE list item: ${list_contents}')
|
||||
}
|
||||
}
|
||||
|
||||
return DocumentType{
|
||||
name: name
|
||||
dtd: DocumentTypeDefinition{
|
||||
list: items
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {
|
||||
// Trim trailing whitespace
|
||||
mut local_buf := [u8(0)]
|
||||
mut ch := next_char(mut reader, mut local_buf)!
|
||||
for {
|
||||
match ch {
|
||||
` `, `\t`, `\n` {
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
continue
|
||||
}
|
||||
`<` {
|
||||
break
|
||||
}
|
||||
else {
|
||||
return error('Expecting a prolog or root node starting with "<".')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
if ch != `?` {
|
||||
return Prolog{}, ch
|
||||
}
|
||||
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
if ch != `x` {
|
||||
return error('Expecting a prolog starting with "<?x".')
|
||||
}
|
||||
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
if ch != `m` {
|
||||
return error('Expecting a prolog starting with "<?xm".')
|
||||
}
|
||||
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
if ch != `l` {
|
||||
return error('Expecting a prolog starting with "<?xml".')
|
||||
}
|
||||
|
||||
mut prolog_buffer := strings.new_builder(xml.default_string_builder_cap)
|
||||
|
||||
// Keep reading character by character until we find the end of the prolog
|
||||
mut found_question_mark := false
|
||||
|
||||
for {
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
match ch {
|
||||
`?` {
|
||||
if found_question_mark {
|
||||
return error('Invalid prolog: Two question marks found in a row.')
|
||||
}
|
||||
found_question_mark = true
|
||||
}
|
||||
`>` {
|
||||
if found_question_mark {
|
||||
break
|
||||
}
|
||||
return error('Invalid prolog: Found ">" before "?".')
|
||||
}
|
||||
else {
|
||||
if found_question_mark {
|
||||
found_question_mark = false
|
||||
prolog_buffer.write_u8(`?`)
|
||||
}
|
||||
prolog_buffer.write_u8(ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prolog_attributes := prolog_buffer.str().trim_space()
|
||||
|
||||
attributes := if prolog_attributes.len == 0 {
|
||||
xml.default_prolog_attributes
|
||||
} else {
|
||||
parse_attributes(prolog_attributes)!
|
||||
}
|
||||
|
||||
version := attributes['version'] or { return error('XML declaration missing version.') }
|
||||
encoding := attributes['encoding'] or { 'UTF-8' }
|
||||
|
||||
mut comments := []XMLComment{}
|
||||
mut doctype := DocumentType{
|
||||
name: ''
|
||||
dtd: ''
|
||||
}
|
||||
mut found_doctype := false
|
||||
for {
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
match ch {
|
||||
` `, `\t`, `\n` {
|
||||
continue
|
||||
}
|
||||
`<` {
|
||||
// We have a comment, DOCTYPE, or root node
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
match ch {
|
||||
`!` {
|
||||
// A comment or DOCTYPE
|
||||
match next_char(mut reader, mut local_buf)! {
|
||||
`-` {
|
||||
// A comment
|
||||
if next_char(mut reader, mut local_buf)! != `-` {
|
||||
return error('Invalid comment.')
|
||||
}
|
||||
comments << parse_comment(mut reader)!
|
||||
}
|
||||
`D` {
|
||||
if found_doctype {
|
||||
return error('Duplicate DOCTYPE declaration.')
|
||||
}
|
||||
// <!D -> OCTYPE
|
||||
mut doc_buf := []u8{len: 6}
|
||||
if reader.read(mut doc_buf)! != 6 {
|
||||
return error('Invalid DOCTYPE.')
|
||||
}
|
||||
if doc_buf != xml.doctype_chars {
|
||||
return error('Invalid DOCTYPE.')
|
||||
}
|
||||
found_doctype = true
|
||||
doctype = parse_doctype(mut reader)!
|
||||
}
|
||||
else {
|
||||
return error('Unsupported control sequence found in prolog.')
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// We have found the start of the root node
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
else {}
|
||||
}
|
||||
}
|
||||
|
||||
return Prolog{
|
||||
version: version
|
||||
encoding: encoding
|
||||
doctype: doctype
|
||||
comments: comments
|
||||
}, ch
|
||||
}
|
||||
|
||||
fn parse_children(name string, attributes map[string]string, mut reader io.Reader) !XMLNode {
|
||||
mut inner_contents := strings.new_builder(xml.default_string_builder_cap)
|
||||
|
||||
mut children := []XMLNodeContents{}
|
||||
mut local_buf := [u8(0)]
|
||||
|
||||
for {
|
||||
ch := next_char(mut reader, mut local_buf)!
|
||||
match ch {
|
||||
`<` {
|
||||
second_char := next_char(mut reader, mut local_buf)!
|
||||
match second_char {
|
||||
`!` {
|
||||
// Comment, CDATA
|
||||
mut next_two := [u8(0), 0]
|
||||
if reader.read(mut next_two)! != 2 {
|
||||
return error('Invalid XML. Incomplete comment or CDATA declaration.')
|
||||
}
|
||||
if next_two == xml.double_dash {
|
||||
// Comment
|
||||
comment := parse_comment(mut reader)!
|
||||
children << comment
|
||||
} else if next_two == xml.c_tag {
|
||||
// <![CDATA -> DATA
|
||||
mut cdata_buf := []u8{len: 4}
|
||||
if reader.read(mut cdata_buf)! != 4 {
|
||||
return error('Invalid XML. Incomplete CDATA declaration.')
|
||||
}
|
||||
if cdata_buf != xml.data_chars {
|
||||
return error('Invalid XML. Expected "CDATA" after "<![C".')
|
||||
}
|
||||
cdata := parse_cdata(mut reader)!
|
||||
children << cdata
|
||||
} else {
|
||||
return error('Invalid XML. Unknown control sequence: ${next_two.bytestr()}')
|
||||
}
|
||||
}
|
||||
`/` {
|
||||
// End of node
|
||||
mut node_end_buffer := []u8{len: name.len + 1}
|
||||
if reader.read(mut node_end_buffer)! != name.len + 1 {
|
||||
return error('Invalid XML. Incomplete node end.')
|
||||
}
|
||||
|
||||
mut ending_chars := name.bytes()
|
||||
ending_chars << `>`
|
||||
|
||||
if node_end_buffer != ending_chars {
|
||||
return error('XML node <${name}> not closed.')
|
||||
}
|
||||
|
||||
collected_contents := inner_contents.str().trim_space()
|
||||
if collected_contents.len > 0 {
|
||||
// We have some inner text
|
||||
children << collected_contents.replace('\r\n', '\n')
|
||||
}
|
||||
return XMLNode{
|
||||
name: name
|
||||
attributes: attributes
|
||||
children: children
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Start of child node
|
||||
child := parse_single_node(second_char, mut reader) or {
|
||||
if err.msg() == 'XML node cannot start with "</".' {
|
||||
return error('XML node <${name}> not closed.')
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
text := inner_contents.str().trim_space()
|
||||
if text.len > 0 {
|
||||
children << text.replace('\r\n', '\n')
|
||||
}
|
||||
children << child
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
inner_contents.write_u8(ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
return error('XML node <${name}> not closed.')
|
||||
}
|
||||
|
||||
fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode {
|
||||
mut local_buf := [u8(0)]
|
||||
mut ch := next_char(mut reader, mut local_buf)!
|
||||
mut contents := strings.new_builder(xml.default_string_builder_cap)
|
||||
// We're expecting an opening tag
|
||||
if ch == `/` {
|
||||
return error('XML node cannot start with "</".')
|
||||
}
|
||||
contents.write_u8(ch)
|
||||
|
||||
for {
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
if ch == `>` {
|
||||
break
|
||||
}
|
||||
contents.write_u8(ch)
|
||||
}
|
||||
|
||||
tag_contents := contents.str().trim_space()
|
||||
|
||||
parts := tag_contents.split_any(' \t\n')
|
||||
name := first_char.ascii_str() + parts[0]
|
||||
|
||||
// Check if it is a self-closing tag
|
||||
if tag_contents.ends_with('/') {
|
||||
// We're not looking for children and inner text
|
||||
return XMLNode{
|
||||
name: name
|
||||
attributes: parse_attributes(tag_contents[name.len - 1..tag_contents.len].trim_space())!
|
||||
}
|
||||
}
|
||||
|
||||
attribute_string := tag_contents[name.len - 1..].trim_space()
|
||||
attributes := parse_attributes(attribute_string)!
|
||||
|
||||
return parse_children(name, attributes, mut reader)
|
||||
}
|
||||
|
||||
// XMLDocument.from_string parses an XML document from a string.
|
||||
pub fn XMLDocument.from_string(raw_contents string) !XMLDocument {
|
||||
mut reader := FullBufferReader{
|
||||
contents: raw_contents.bytes()
|
||||
}
|
||||
return XMLDocument.from_reader(mut reader)!
|
||||
}
|
||||
|
||||
// XMLDocument.from_file parses an XML document from a file. Note that the file is read in its entirety
|
||||
// and then parsed. If the file is too large, try using the XMLDocument.from_reader function instead.
|
||||
pub fn XMLDocument.from_file(path string) !XMLDocument {
|
||||
mut reader := FullBufferReader{
|
||||
contents: os.read_bytes(path)!
|
||||
}
|
||||
return XMLDocument.from_reader(mut reader)!
|
||||
}
|
||||
|
||||
// XMLDocument.from_reader parses an XML document from a reader. This is the most generic way to parse
|
||||
// an XML document from any arbitrary source that implements that io.Reader interface.
|
||||
pub fn XMLDocument.from_reader(mut reader io.Reader) !XMLDocument {
|
||||
prolog, first_char := parse_prolog(mut reader) or {
|
||||
if err is os.Eof || err is io.Eof || err.msg() == 'Unexpected End Of File.' {
|
||||
return error('XML document is empty.')
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
root := parse_single_node(first_char, mut reader)!
|
||||
|
||||
return XMLDocument{
|
||||
version: prolog.version
|
||||
encoding: prolog.encoding
|
||||
comments: prolog.comments
|
||||
doctype: prolog.doctype
|
||||
root: root
|
||||
}
|
||||
}
|
60
vlib/encoding/xml/query.v
Normal file
60
vlib/encoding/xml/query.v
Normal file
|
@ -0,0 +1,60 @@
|
|||
module xml
|
||||
|
||||
fn (node XMLNode) get_element_by_id(id string) ?XMLNode {
|
||||
// Is this the node we're looking for?
|
||||
if attribute_id := node.attributes['id'] {
|
||||
if attribute_id == id {
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
if node.children.len == 0 {
|
||||
return none
|
||||
}
|
||||
|
||||
// Recurse into children
|
||||
for child in node.children {
|
||||
match child {
|
||||
XMLNode {
|
||||
if result := child.get_element_by_id(id) {
|
||||
return result
|
||||
}
|
||||
}
|
||||
else {}
|
||||
}
|
||||
}
|
||||
|
||||
return none
|
||||
}
|
||||
|
||||
fn (node XMLNode) get_elements_by_tag(tag string) []XMLNode {
|
||||
mut result := []XMLNode{}
|
||||
|
||||
if node.name == tag {
|
||||
result << node
|
||||
}
|
||||
|
||||
if node.children.len == 0 {
|
||||
return result
|
||||
}
|
||||
|
||||
// Recurse into children
|
||||
for child in node.children {
|
||||
if child is XMLNode {
|
||||
result << child.get_elements_by_tag(tag)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// get_element_by_id returns the first element with the given id, or none if no
|
||||
// such element exists.
|
||||
pub fn (doc XMLDocument) get_element_by_id(id string) ?XMLNode {
|
||||
return doc.root.get_element_by_id(id)
|
||||
}
|
||||
|
||||
// get_elements_by_tag returns all elements with the given tag name.
|
||||
pub fn (doc XMLDocument) get_elements_by_tag(tag string) []XMLNode {
|
||||
return doc.root.get_elements_by_tag(tag)
|
||||
}
|
30
vlib/encoding/xml/reader_util.v
Normal file
30
vlib/encoding/xml/reader_util.v
Normal file
|
@ -0,0 +1,30 @@
|
|||
module xml
|
||||
|
||||
import io
|
||||
|
||||
fn next_char(mut reader io.Reader, mut buf []u8) !u8 {
|
||||
if reader.read(mut buf)! == 0 {
|
||||
return error('Unexpected End Of File.')
|
||||
}
|
||||
return buf[0]
|
||||
}
|
||||
|
||||
struct FullBufferReader {
|
||||
contents []u8
|
||||
mut:
|
||||
position int
|
||||
}
|
||||
|
||||
[direct_array_access]
|
||||
fn (mut fbr FullBufferReader) read(mut buf []u8) !int {
|
||||
if fbr.position >= fbr.contents.len {
|
||||
return io.Eof{}
|
||||
}
|
||||
remaining := fbr.contents.len - fbr.position
|
||||
n := if buf.len < remaining { buf.len } else { remaining }
|
||||
unsafe {
|
||||
vmemcpy(&u8(buf.data), &u8(fbr.contents.data) + fbr.position, n)
|
||||
}
|
||||
fbr.position += n
|
||||
return n
|
||||
}
|
89
vlib/encoding/xml/test/gtk/gtk_test.v
Normal file
89
vlib/encoding/xml/test/gtk/gtk_test.v
Normal file
|
@ -0,0 +1,89 @@
|
|||
module main
|
||||
|
||||
import encoding.xml
|
||||
import os
|
||||
|
||||
fn test_large_gtk_file() ! {
|
||||
// Note: If you are contributing to this project, you should download the
|
||||
// GIR file from https://raw.githubusercontent.com/gtk-rs/gir-files/master/Gtk-4.0.gir
|
||||
// and place it in the same directory as this file.
|
||||
path := os.join_path(os.dir(@FILE), 'Gtk-4.0.gir')
|
||||
if !os.exists(path) {
|
||||
println('Skipping test_large_gtk_file because file does not exist.')
|
||||
return
|
||||
}
|
||||
|
||||
actual := xml.XMLDocument.from_file(path) or {
|
||||
return error('Failed to parse large GTK XML file')
|
||||
}
|
||||
|
||||
mut valid := false
|
||||
for elm in actual.get_elements_by_tag('class') {
|
||||
if 'c:type' in elm.attributes && elm.attributes['c:type'] == 'GtkWindow' {
|
||||
assert elm.attributes['parent'] == 'Widget'
|
||||
assert elm.attributes['c:symbol-prefix'] == 'window'
|
||||
valid = true
|
||||
}
|
||||
}
|
||||
assert valid, 'GtkWindow class not found!'
|
||||
|
||||
valid = false
|
||||
for elm in actual.get_elements_by_tag('constructor') {
|
||||
if 'c:identifier' in elm.attributes && elm.attributes['c:identifier'] == 'gtk_window_new' {
|
||||
assert elm == xml.XMLNode{
|
||||
name: 'constructor'
|
||||
attributes: {
|
||||
'name': 'new'
|
||||
'c:identifier': 'gtk_window_new'
|
||||
}
|
||||
children: [
|
||||
xml.XMLNodeContents(xml.XMLNode{
|
||||
name: 'doc'
|
||||
attributes: {
|
||||
'xml:space': 'preserve'
|
||||
}
|
||||
children: [
|
||||
xml.XMLNodeContents('Creates a new `GtkWindow`.
|
||||
|
||||
To get an undecorated window (no window borders), use
|
||||
[method@Gtk.Window.set_decorated].
|
||||
|
||||
All top-level windows created by gtk_window_new() are stored
|
||||
in an internal top-level window list. This list can be obtained
|
||||
from [func@Gtk.Window.list_toplevels]. Due to GTK keeping a
|
||||
reference to the window internally, gtk_window_new() does not
|
||||
return a reference to the caller.
|
||||
|
||||
To delete a `GtkWindow`, call [method@Gtk.Window.destroy].'),
|
||||
]
|
||||
}),
|
||||
xml.XMLNodeContents(xml.XMLNode{
|
||||
name: 'return-value'
|
||||
attributes: {
|
||||
'transfer-ownership': 'none'
|
||||
}
|
||||
children: [
|
||||
xml.XMLNodeContents(xml.XMLNode{
|
||||
name: 'doc'
|
||||
attributes: {
|
||||
'xml:space': 'preserve'
|
||||
}
|
||||
children: [xml.XMLNodeContents('a new `GtkWindow`.')]
|
||||
}),
|
||||
xml.XMLNodeContents(xml.XMLNode{
|
||||
name: 'type'
|
||||
attributes: {
|
||||
'name': 'Widget'
|
||||
'c:type': 'GtkWidget*'
|
||||
}
|
||||
children: []
|
||||
}),
|
||||
]
|
||||
}),
|
||||
]
|
||||
}
|
||||
valid = true
|
||||
}
|
||||
}
|
||||
assert valid, 'gtk_window_new constructor not found!'
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<message>
|
||||
<greeting>
|
||||
Hello, World!
|
||||
</greeting>
|
||||
</message>
|
|
@ -0,0 +1,23 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'hello_world.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'message'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'greeting'
|
||||
children: [
|
||||
'Hello, World!',
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
6
vlib/encoding/xml/test/local/02_note_message/note.xml
Normal file
6
vlib/encoding/xml/test/local/02_note_message/note.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<note>
|
||||
<to>Tove</to>
|
||||
<from>Jani</from>
|
||||
<heading>Reminder</heading>
|
||||
<body>Don't forget me this weekend!</body>
|
||||
</note>
|
41
vlib/encoding/xml/test/local/02_note_message/note_test.v
Normal file
41
vlib/encoding/xml/test/local/02_note_message/note_test.v
Normal file
|
@ -0,0 +1,41 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'note.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'note'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'to'
|
||||
children: [
|
||||
'Tove',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'from'
|
||||
children: [
|
||||
'Jani',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'heading'
|
||||
children: [
|
||||
'Reminder',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'body'
|
||||
children: [
|
||||
"Don't forget me this weekend!",
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
34
vlib/encoding/xml/test/local/03_cd_catalogue/cd_catalog.xml
Normal file
34
vlib/encoding/xml/test/local/03_cd_catalogue/cd_catalog.xml
Normal file
|
@ -0,0 +1,34 @@
|
|||
<CATALOG>
|
||||
<CD>
|
||||
<TITLE>Empire Burlesque</TITLE>
|
||||
<ARTIST>Bob Dylan</ARTIST>
|
||||
<COUNTRY>USA</COUNTRY>
|
||||
<COMPANY>Columbia</COMPANY>
|
||||
<PRICE>10.90</PRICE>
|
||||
<YEAR>1985</YEAR>
|
||||
</CD>
|
||||
<CD>
|
||||
<TITLE>Hide your heart</TITLE>
|
||||
<ARTIST>Bonnie Tyler</ARTIST>
|
||||
<COUNTRY>UK</COUNTRY>
|
||||
<COMPANY>CBS Records</COMPANY>
|
||||
<PRICE>9.90</PRICE>
|
||||
<YEAR>1988</YEAR>
|
||||
</CD>
|
||||
<CD>
|
||||
<TITLE>Greatest Hits</TITLE>
|
||||
<ARTIST>Dolly Parton</ARTIST>
|
||||
<COUNTRY>USA</COUNTRY>
|
||||
<COMPANY>RCA</COMPANY>
|
||||
<PRICE>9.90</PRICE>
|
||||
<YEAR>1982</YEAR>
|
||||
</CD>
|
||||
<CD>
|
||||
<TITLE>Still got the blues</TITLE>
|
||||
<ARTIST>Gary Moore</ARTIST>
|
||||
<COUNTRY>UK</COUNTRY>
|
||||
<COMPANY>Virgin records</COMPANY>
|
||||
<PRICE>10.20</PRICE>
|
||||
<YEAR>1990</YEAR>
|
||||
</CD>
|
||||
</CATALOG>
|
181
vlib/encoding/xml/test/local/03_cd_catalogue/cd_test.v
Normal file
181
vlib/encoding/xml/test/local/03_cd_catalogue/cd_test.v
Normal file
|
@ -0,0 +1,181 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'cd_catalog.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'CATALOG'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'CD'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'TITLE'
|
||||
children: [
|
||||
'Empire Burlesque',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'ARTIST'
|
||||
children: [
|
||||
'Bob Dylan',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COUNTRY'
|
||||
children: [
|
||||
'USA',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COMPANY'
|
||||
children: [
|
||||
'Columbia',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'PRICE'
|
||||
children: [
|
||||
'10.90',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'YEAR'
|
||||
children: [
|
||||
'1985',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'CD'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'TITLE'
|
||||
children: [
|
||||
'Hide your heart',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'ARTIST'
|
||||
children: [
|
||||
'Bonnie Tyler',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COUNTRY'
|
||||
children: [
|
||||
'UK',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COMPANY'
|
||||
children: [
|
||||
'CBS Records',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'PRICE'
|
||||
children: [
|
||||
'9.90',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'YEAR'
|
||||
children: [
|
||||
'1988',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'CD'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'TITLE'
|
||||
children: [
|
||||
'Greatest Hits',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'ARTIST'
|
||||
children: [
|
||||
'Dolly Parton',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COUNTRY'
|
||||
children: [
|
||||
'USA',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COMPANY'
|
||||
children: [
|
||||
'RCA',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'PRICE'
|
||||
children: [
|
||||
'9.90',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'YEAR'
|
||||
children: [
|
||||
'1982',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'CD'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'TITLE'
|
||||
children: [
|
||||
'Still got the blues',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'ARTIST'
|
||||
children: [
|
||||
'Gary Moore',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COUNTRY'
|
||||
children: [
|
||||
'UK',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'COMPANY'
|
||||
children: [
|
||||
'Virgin records',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'PRICE'
|
||||
children: [
|
||||
'10.20',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'YEAR'
|
||||
children: [
|
||||
'1990',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
0
vlib/encoding/xml/test/local/04_empty_file/empty.xml
Normal file
0
vlib/encoding/xml/test/local/04_empty_file/empty.xml
Normal file
|
@ -0,0 +1 @@
|
|||
XML document is empty.
|
1
vlib/encoding/xml/test/local/05_single_element/root.xml
Normal file
1
vlib/encoding/xml/test/local/05_single_element/root.xml
Normal file
|
@ -0,0 +1 @@
|
|||
<sample>Single root element.</sample>
|
18
vlib/encoding/xml/test/local/05_single_element/root_test.v
Normal file
18
vlib/encoding/xml/test/local/05_single_element/root_test.v
Normal file
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'root.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'sample'
|
||||
children: [
|
||||
'Single root element.',
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
14
vlib/encoding/xml/test/local/06_nested_elements/nested.xml
Normal file
14
vlib/encoding/xml/test/local/06_nested_elements/nested.xml
Normal file
|
@ -0,0 +1,14 @@
|
|||
<level1>
|
||||
<level2>
|
||||
<level3>
|
||||
<level4>
|
||||
Deeply nested content.
|
||||
</level4>
|
||||
</level3>
|
||||
</level2>
|
||||
<level2>
|
||||
<level3>
|
||||
Less deeply nested content.
|
||||
</level3>
|
||||
</level2>
|
||||
</level1>
|
|
@ -0,0 +1,44 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'nested.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'level1'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'level2'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'level3'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'level4'
|
||||
children: [
|
||||
'Deeply nested content.',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'level2'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'level3'
|
||||
children: [
|
||||
'Less deeply nested content.',
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
5
vlib/encoding/xml/test/local/07_mixed_contents/mixed.xml
Normal file
5
vlib/encoding/xml/test/local/07_mixed_contents/mixed.xml
Normal file
|
@ -0,0 +1,5 @@
|
|||
<letter>
|
||||
Dear Mr. <name>John Smith</name>.
|
||||
Your order <orderid>1032</orderid>
|
||||
will be shipped on <shipdate>2001-07-13</shipdate>.
|
||||
</letter>
|
33
vlib/encoding/xml/test/local/07_mixed_contents/mixed_test.v
Normal file
33
vlib/encoding/xml/test/local/07_mixed_contents/mixed_test.v
Normal file
|
@ -0,0 +1,33 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'mixed.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'letter'
|
||||
children: [
|
||||
'Dear Mr.',
|
||||
xml.XMLNode{
|
||||
name: 'name'
|
||||
children: ['John Smith']
|
||||
},
|
||||
'.\n Your order',
|
||||
xml.XMLNode{
|
||||
name: 'orderid'
|
||||
children: ['1032']
|
||||
},
|
||||
'will be shipped on',
|
||||
xml.XMLNode{
|
||||
name: 'shipdate'
|
||||
children: ['2001-07-13']
|
||||
},
|
||||
'.',
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
12
vlib/encoding/xml/test/local/08_comments/comment.xml
Normal file
12
vlib/encoding/xml/test/local/08_comments/comment.xml
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!-- Employee Information-->
|
||||
<address>
|
||||
<!-- Full or first name -->
|
||||
<name>Jones</name>
|
||||
<!-- Registered name of the company -> -->
|
||||
<company>ABSystems</company>
|
||||
<phone>
|
||||
<!-- Phone with country code -) -->
|
||||
(046) 1233-44778
|
||||
</phone>
|
||||
</address>
|
42
vlib/encoding/xml/test/local/08_comments/comment_test.v
Normal file
42
vlib/encoding/xml/test/local/08_comments/comment_test.v
Normal file
|
@ -0,0 +1,42 @@
|
|||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() ! {
|
||||
path := os.join_path(os.dir(@FILE), 'comment.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
comments: [
|
||||
xml.XMLComment{
|
||||
text: ' Employee Information'
|
||||
},
|
||||
]
|
||||
root: xml.XMLNode{
|
||||
name: 'address'
|
||||
children: [
|
||||
xml.XMLComment{
|
||||
text: ' Full or first name '
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'name'
|
||||
children: ['Jones']
|
||||
},
|
||||
xml.XMLComment{
|
||||
text: ' Registered name of the company -> '
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'company'
|
||||
children: ['ABSystems']
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'phone'
|
||||
children: [xml.XMLComment{
|
||||
text: ' Phone with country code -) '
|
||||
}, '(046) 1233-44778']
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
Malformed XML. Found "<" in attribute string: "<body"
|
1
vlib/encoding/xml/test/local/09_malformed/malformed.xml
Normal file
1
vlib/encoding/xml/test/local/09_malformed/malformed.xml
Normal file
|
@ -0,0 +1 @@
|
|||
<message <body>Sample</body></message>
|
|
@ -0,0 +1 @@
|
|||
XML node <warning> not closed.
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<message>
|
||||
<warning>
|
||||
Hello World
|
||||
<!--missing </warning> -->
|
||||
</message>
|
4
vlib/encoding/xml/test/local/11_cdata_content/cdata.xml
Normal file
4
vlib/encoding/xml/test/local/11_cdata_content/cdata.xml
Normal file
|
@ -0,0 +1,4 @@
|
|||
<sample>
|
||||
<html>This is <b>bold</b></html>
|
||||
<html><![CDATA[This is <b>bold</b>]]></html>
|
||||
</sample>
|
29
vlib/encoding/xml/test/local/11_cdata_content/cdata_test.v
Normal file
29
vlib/encoding/xml/test/local/11_cdata_content/cdata_test.v
Normal file
|
@ -0,0 +1,29 @@
|
|||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() {
|
||||
path := os.join_path(os.dir(@FILE), 'cdata.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'sample'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'html'
|
||||
children: ['This is <b>bold</b>']
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'html'
|
||||
children: [xml.XMLCData{
|
||||
text: 'This is <b>bold</b>'
|
||||
}]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE body [
|
||||
<!ENTITY warning "Warning: Something bad happened... please refresh and try again.">
|
||||
]>
|
||||
<body>
|
||||
<message> &warning; </message>
|
||||
</body>
|
|
@ -0,0 +1,41 @@
|
|||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() {
|
||||
path := os.join_path(os.dir(@FILE), 'entity.xml')
|
||||
|
||||
mut reverse_entities := xml.default_entities_reverse.clone()
|
||||
reverse_entities['Warning: Something bad happened... please refresh and try again.'] = 'warning'
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
parsed_reverse_entities: reverse_entities
|
||||
doctype: xml.DocumentType{
|
||||
name: 'body'
|
||||
dtd: xml.DocumentTypeDefinition{
|
||||
name: ''
|
||||
list: [
|
||||
xml.DTDEntity{
|
||||
name: 'warning'
|
||||
value: 'Warning: Something bad happened... please refresh and try again.'
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
root: xml.XMLNode{
|
||||
name: 'body'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'message'
|
||||
children: [
|
||||
'Warning: Something bad happened... please refresh and try again.',
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!.validate()!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() {
|
||||
path := os.join_path(os.dir(@FILE), 'element.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
doctype: xml.DocumentType{
|
||||
name: 'note'
|
||||
dtd: xml.DocumentTypeDefinition{
|
||||
name: ''
|
||||
list: [
|
||||
xml.DTDElement{
|
||||
name: 'note'
|
||||
definition: ['to', 'from', 'heading', 'body']
|
||||
},
|
||||
xml.DTDElement{
|
||||
name: 'to'
|
||||
definition: ['#PCDATA']
|
||||
},
|
||||
xml.DTDElement{
|
||||
name: 'from'
|
||||
definition: ['#PCDATA']
|
||||
},
|
||||
xml.DTDElement{
|
||||
name: 'heading'
|
||||
definition: ['#PCDATA']
|
||||
},
|
||||
xml.DTDElement{
|
||||
name: 'body'
|
||||
definition: ['#PCDATA']
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
root: xml.XMLNode{
|
||||
name: 'note'
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'to'
|
||||
children: [
|
||||
'Tove',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'from'
|
||||
children: [
|
||||
'Jani',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'heading'
|
||||
children: [
|
||||
'Reminder',
|
||||
]
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'body'
|
||||
children: [
|
||||
"Don't forget me this weekend!",
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!.validate()!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
14
vlib/encoding/xml/test/local/13_doctype_element/element.xml
Normal file
14
vlib/encoding/xml/test/local/13_doctype_element/element.xml
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE note [
|
||||
<!ELEMENT note (to,from,heading,body)>
|
||||
<!ELEMENT to (#PCDATA)>
|
||||
<!ELEMENT from (#PCDATA)>
|
||||
<!ELEMENT heading (#PCDATA)>
|
||||
<!ELEMENT body (#PCDATA)>
|
||||
]>
|
||||
<note>
|
||||
<to>Tove</to>
|
||||
<from>Jani</from>
|
||||
<heading>Reminder</heading>
|
||||
<body>Don't forget me this weekend!</body>
|
||||
</note>
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<book category="web">
|
||||
<title lang="en" code:type="const char*">Learning XML</title>
|
||||
<author attr=" surrounding spaces ">Erik T. Ray</author>
|
||||
<year>2003</year>
|
||||
<price>39.95</price>
|
||||
</book>
|
45
vlib/encoding/xml/test/local/14_attributes/attributes_test.v
Normal file
45
vlib/encoding/xml/test/local/14_attributes/attributes_test.v
Normal file
|
@ -0,0 +1,45 @@
|
|||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() {
|
||||
path := os.join_path(os.dir(@FILE), 'attributes.xml')
|
||||
|
||||
expected := xml.XMLDocument{
|
||||
root: xml.XMLNode{
|
||||
name: 'book'
|
||||
attributes: {
|
||||
'category': 'web'
|
||||
}
|
||||
children: [
|
||||
xml.XMLNode{
|
||||
name: 'title'
|
||||
attributes: {
|
||||
'lang': 'en'
|
||||
'code:type': 'const char*'
|
||||
}
|
||||
children: ['Learning XML']
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'author'
|
||||
attributes: {
|
||||
'attr': ' surrounding spaces '
|
||||
}
|
||||
children: ['Erik T. Ray']
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'year'
|
||||
children: ['2003']
|
||||
},
|
||||
xml.XMLNode{
|
||||
name: 'price'
|
||||
children: ['39.95']
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
actual := xml.XMLDocument.from_file(path)!
|
||||
|
||||
assert expected == actual, 'Parsed XML document should be equal to expected XML document'
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE body [
|
||||
<!ENTITY>
|
||||
]>
|
||||
<body>
|
||||
</body>
|
|
@ -0,0 +1 @@
|
|||
Entity is missing name.
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE body [
|
||||
<!ENTITY missing>
|
||||
]>
|
||||
<body>
|
||||
&missing;
|
||||
</body>
|
|
@ -0,0 +1 @@
|
|||
Entity is missing value.
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE note [
|
||||
<!ELEMENT>
|
||||
]>
|
||||
<note>
|
||||
</note>
|
|
@ -0,0 +1 @@
|
|||
Element is missing name.
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE note [
|
||||
<!ELEMENT note invalid>
|
||||
]>
|
||||
<note>
|
||||
</note>
|
|
@ -0,0 +1 @@
|
|||
Invalid element definition: invalid
|
25
vlib/encoding/xml/test/spec_test.v
Normal file
25
vlib/encoding/xml/test/spec_test.v
Normal file
|
@ -0,0 +1,25 @@
|
|||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
// All the XML files in the spec directory obtained recursively
|
||||
const spec_files = os.walk_ext(os.join_path(os.dir(@FILE), 'local'), 'xml')
|
||||
|
||||
fn test_can_parse_all_files() ! {
|
||||
assert spec_files.len > 0, 'No XML files found in the spec directory'
|
||||
for file in spec_files {
|
||||
doc := xml.XMLDocument.from_file(file) or {
|
||||
// Parsing failed. Check if this was an expected error.
|
||||
parent := os.dir(file)
|
||||
error_file := os.join_path(parent, 'expected_error.txt')
|
||||
error_text := os.read_file(error_file) or {
|
||||
// No expected error. Fail the test.
|
||||
return error('Failed to parse XML file: ' + file)
|
||||
}
|
||||
// Check if the error message matches the expected error.
|
||||
assert err.msg().trim_space() == error_text.trim_space()
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
71
vlib/encoding/xml/types.v
Normal file
71
vlib/encoding/xml/types.v
Normal file
|
@ -0,0 +1,71 @@
|
|||
module xml
|
||||
|
||||
pub type XMLNodeContents = XMLCData | XMLComment | XMLNode | string
|
||||
|
||||
pub struct XMLCData {
|
||||
pub:
|
||||
text string [required]
|
||||
}
|
||||
|
||||
pub struct XMLComment {
|
||||
pub:
|
||||
text string [required]
|
||||
}
|
||||
|
||||
// XMLNode represents a single XML node. It contains the node name,
|
||||
// a map of attributes, and a list of children. The children can be
|
||||
// other XML nodes, CDATA, plain text, or comments.
|
||||
pub struct XMLNode {
|
||||
pub:
|
||||
name string [required]
|
||||
attributes map[string]string
|
||||
children []XMLNodeContents
|
||||
}
|
||||
|
||||
// XMLDocument is the struct that represents a single XML document.
|
||||
// It contains the prolog and the single root node. The prolog struct
|
||||
// is embedded into the XMLDocument struct, so that the prolog fields
|
||||
// are accessible directly from the this struct.
|
||||
// Public prolog fields include version, enccoding, comments preceding
|
||||
// the root node, and the document type definition.
|
||||
pub struct XMLDocument {
|
||||
Prolog
|
||||
pub:
|
||||
root XMLNode [required]
|
||||
}
|
||||
|
||||
pub type DTDListItem = DTDElement | DTDEntity
|
||||
|
||||
pub struct DTDEntity {
|
||||
name string [required]
|
||||
value string [required]
|
||||
}
|
||||
|
||||
pub struct DTDElement {
|
||||
name string [required]
|
||||
definition []string [required]
|
||||
}
|
||||
|
||||
pub struct DocumentTypeDefinition {
|
||||
name string
|
||||
list []DTDListItem
|
||||
}
|
||||
|
||||
pub struct DocumentType {
|
||||
name string [required]
|
||||
dtd DTDInfo
|
||||
}
|
||||
|
||||
type DTDInfo = DocumentTypeDefinition | string
|
||||
|
||||
struct Prolog {
|
||||
parsed_reverse_entities map[string]string = default_entities_reverse.clone()
|
||||
pub:
|
||||
version string = '1.0'
|
||||
encoding string = 'UTF-8'
|
||||
doctype DocumentType = DocumentType{
|
||||
name: ''
|
||||
dtd: ''
|
||||
}
|
||||
comments []XMLComment
|
||||
}
|
96
vlib/encoding/xml/validation.v
Normal file
96
vlib/encoding/xml/validation.v
Normal file
|
@ -0,0 +1,96 @@
|
|||
module xml
|
||||
|
||||
fn (node XMLNode) validate(elements map[string]DTDElement, entities map[string]string) !XMLNode {
|
||||
mut children := []XMLNodeContents{cap: node.children.len}
|
||||
|
||||
valid_elements := elements[node.name].definition
|
||||
mut validate_node_children := node.name in elements
|
||||
|
||||
// Check if the node will match everything
|
||||
if valid_elements.len == 1 && valid_elements[0] == '#PCDATA' {
|
||||
validate_node_children = false
|
||||
}
|
||||
|
||||
for child in node.children {
|
||||
match child {
|
||||
XMLNode {
|
||||
if validate_node_children {
|
||||
name := child.name
|
||||
if name !in valid_elements {
|
||||
return error('Invalid child element ${name} for ${node.name}')
|
||||
}
|
||||
}
|
||||
children << child.validate(elements, entities)!
|
||||
}
|
||||
string {
|
||||
children << unescape_text(child, entities: entities)!
|
||||
}
|
||||
else {
|
||||
// Ignore other nodes
|
||||
children << child
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return XMLNode{
|
||||
name: node.name
|
||||
attributes: node.attributes
|
||||
children: children
|
||||
}
|
||||
}
|
||||
|
||||
// validate checks the document is well-formed and valid. It returns a new
|
||||
// document with the parsed entities expanded when validation is successful.
|
||||
// Otherwise it returns an error.
|
||||
pub fn (doc XMLDocument) validate() !XMLDocument {
|
||||
// The document is well-formed because we were able to parse it properly.
|
||||
match doc.doctype.dtd {
|
||||
DocumentTypeDefinition {
|
||||
// Store the element and entity definitions
|
||||
mut elements := map[string]DTDElement{}
|
||||
mut entities := default_entities.clone()
|
||||
mut reverse_entities := default_entities_reverse.clone()
|
||||
|
||||
for item in doc.doctype.dtd.list {
|
||||
match item {
|
||||
DTDElement {
|
||||
name := item.name
|
||||
if name in elements {
|
||||
return error('Duplicate element definition for ${name}')
|
||||
}
|
||||
elements[name] = item
|
||||
}
|
||||
DTDEntity {
|
||||
name := item.name
|
||||
if name in entities {
|
||||
return error('Duplicate entity definition for ${name}')
|
||||
}
|
||||
entities[name] = item.value
|
||||
reverse_entities[item.value] = name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now validate the document against the elements and entities.
|
||||
new_root := doc.root.validate(elements, entities)!
|
||||
|
||||
// Check the DOCTYPE name matches the root name
|
||||
if doc.doctype.name.len > 0 && doc.doctype.name != new_root.name {
|
||||
return error('Root element ${new_root.name} does not match DOCTYPE ${doc.doctype.name}')
|
||||
}
|
||||
|
||||
return XMLDocument{
|
||||
version: doc.version
|
||||
encoding: doc.encoding
|
||||
doctype: doc.doctype
|
||||
comments: doc.comments
|
||||
root: new_root
|
||||
parsed_reverse_entities: reverse_entities
|
||||
}
|
||||
}
|
||||
string {
|
||||
// TODO: Validate the document against the DTD string.
|
||||
return doc
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue