encoding.html: implement unescape() (#19267)

This commit is contained in:
Turiiya 2023-09-05 07:29:24 +02:00 committed by GitHub
parent c126450201
commit 273341685a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 2258 additions and 5 deletions

View file

@ -20,3 +20,51 @@ fn test_escape_html() {
assert html.escape('café') == 'café'
assert html.escape('<p>façade</p>') == '&lt;p&gt;façade&lt;/p&gt;'
}
fn test_unescape_html() {
// Test different formats
assert html.unescape('&#39;&#x27;&apos;') == "'&#x27;&apos;"
// Converse escape tests
assert html.unescape('&lt;&gt;&amp;') == '<>&'
assert html.unescape('No change') == 'No change'
assert html.unescape('&lt;b&gt;Bold text&lt;/b&gt;') == '<b>Bold text</b>'
assert html.unescape('&lt;img /&gt;') == '<img />'
assert html.unescape('&#39; onmouseover=&#39;alert(1)&#39;') == "' onmouseover='alert(1)'"
assert html.unescape('&lt;a href=&#39;http://www.example.com&#39;&gt;link&lt;/a&gt;') == "<a href='http://www.example.com'>link</a>"
assert html.unescape('&lt;script&gt;alert(&#39;hello&#39;);&lt;/script&gt;') == "<script>alert('hello');</script>"
// Cases obtained from:
// https://github.com/apache/commons-lang/blob/master/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
assert html.unescape('plain text') == 'plain text'
assert html.unescape('') == ''
assert html.unescape('bread &amp; butter') == 'bread & butter'
assert html.unescape('&#34;bread&#34; &amp; butter') == '"bread" & butter'
assert html.unescape('greater than &gt;') == 'greater than >'
assert html.unescape('&lt; less than') == '< less than'
// Leave accents as-is
assert html.unescape('café') == 'café'
assert html.unescape('&lt;p&gt;façade&lt;/p&gt;') == '<p>façade</p>'
}
fn test_unescape_all_html() {
// Test different formats
assert html.unescape('&#39;&#x27;&apos;', all: true) == "'''"
// Converse escape tests
assert html.unescape('&lt;&gt;&amp;', all: true) == '<>&'
assert html.unescape('No change', all: true) == 'No change'
assert html.unescape('&lt;b&gt;Bold text&lt;/b&gt;', all: true) == '<b>Bold text</b>'
assert html.unescape('&lt;img /&gt;', all: true) == '<img />'
assert html.unescape('&#39; onmouseover=&#39;alert(1)&#39;', all: true) == "' onmouseover='alert(1)'"
assert html.unescape('&lt;a href=&#39;http://www.example.com&#39;&gt;link&lt;/a&gt;', all: true) == "<a href='http://www.example.com'>link</a>"
assert html.unescape('&lt;script&gt;alert(&#39;hello&#39;);&lt;/script&gt;', all: true) == "<script>alert('hello');</script>"
// Cases obtained from:
// https://github.com/apache/commons-lang/blob/master/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
assert html.unescape('plain text', all: true) == 'plain text'
assert html.unescape('', all: true) == ''
assert html.unescape('bread &amp; butter', all: true) == 'bread & butter'
assert html.unescape('&#34;bread&#34; &amp; butter', all: true) == '"bread" & butter'
assert html.unescape('greater than &gt;', all: true) == 'greater than >'
assert html.unescape('&lt; less than', all: true) == '< less than'
// Leave accents as-is
assert html.unescape('café', all: true) == 'café'
assert html.unescape('&lt;p&gt;façade&lt;/p&gt;', all: true) == '<p>façade</p>'
}