commit 1f678a86216c75b7c0178ee33434f5be488f2642
parent db479b931e80b098e15607ae0b4aac7cb36408e9
Author: Johann150 <johann.galle@protonmail.com>
Date: Thu, 13 Jan 2022 02:05:55 +0100
refactor tests to have meaningful names
Diffstat:
M | src/lib.rs | | | 228 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
1 file changed, 126 insertions(+), 102 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
@@ -206,116 +206,140 @@ pub fn html2text(html: &str) -> String {
#[cfg(test)]
mod tests {
use super::*;
- const cases: &[(&str, &str)] = &[
- ("blah", "blah"),
- // links
- ("<div></div>", ""),
- ("<div>simple text</div>", "simple text"),
- ("click <a href=\"test\">here</a>", "click test"),
- ("click <a class=\"x\" href=\"test\">here</a>", "click test"),
- (
- "click <a href=\"ents/'x'\">here</a>",
- "click ents/'x'",
- ),
- ("click <a href=\"javascript:void(0)\">here</a>", "click "),
- (
- "click <a href=\"test\"><span>here</span> or here</a>",
- "click test",
- ),
- (
- "click <a href=\"http://bit.ly/2n4wXRs\">news</a>",
- "click http://bit.ly/2n4wXRs",
- ),
- ("<a rel=\"mw:WikiLink\" href=\"/wiki/yet#English\" title=\"yet\">yet</a>, <a rel=\"mw:WikiLink\" href=\"/wiki/not_yet#English\" title=\"not yet\">not yet</a>", "/wiki/yet#English, /wiki/not_yet#English"),
+ macro_rules! test {
+ ($name:ident, $from:literal, $to:literal $(,)?) => {
+ #[test]
+ fn $name() {
+ assert_eq!(&html2text($from), $to);
+ }
+ };
+ ($($name:ident: $from:literal to $to:literal,)* $(,)?) => {
+ $(test!{$name, $from, $to})*
+ };
+ }
+
+ test! {
+ plaintext: "blah" to "blah",
+ tag: "<div></div>" to "",
+ tag_contents: "<div>simple text</div>" to "simple text",
+ // links
+ link:
+ "click <a href=\"test\">here</a>"
+ to "click test",
+ links_ignore_attributes:
+ "click <a class=\"x\" href=\"test\">here</a>"
+ to "click test",
+ link_entities_in_url:
+ "click <a href=\"ents/'x'\">here</a>"
+ to "click ents/'x'",
+ link_javascript:
+ "click <a href=\"javascript:void(0)\">here</a>"
+ to "click ",
+ link_ignore_content_tags:
+ "click <a href=\"test\"><span>here</span> or here</a>"
+ to "click test",
+ link_absolute_url:
+ "click <a href=\"http://bit.ly/2n4wXRs\">news</a>"
+ to "click http://bit.ly/2n4wXRs",
+ link_ignore_attributes_2:
+ "<a rel=\"mw:WikiLink\" href=\"/wiki/yet#English\" title=\"yet\">yet</a>, <a rel=\"mw:WikiLink\" href=\"/wiki/not_yet#English\" title=\"not yet\">not yet</a>"
+ to "/wiki/yet#English, /wiki/not_yet#English",
// inlines
- ("strong <strong>text</strong>", "strong text"),
- ("some <div id=\"a\" class=\"b\">div</div>", "some div"),
+ ignore_inline:
+ "strong <strong>text</strong>"
+ to "strong text",
+ ignore_inline_attributes:
+ "some <div id=\"a\" class=\"b\">div</div>"
+ to "some div",
// lines breaks and spaces
- ("should ignore more spaces", "should ignore more spaces"),
- ("should \nignore \r\nnew lines", "should ignore new lines"),
- ("a\nb\nc", "a b c"),
- ("two<br>line<br/>breaks", "two\r\nline\r\nbreaks"),
- ("<p>two</p><p>paragraphs</p>", "two\r\n\r\nparagraphs"),
+ collapse_spaces:
+ "should ignore more spaces" to "should ignore more spaces",
+ collapse_linebreaks:
+ "a\nb\nc" to "a b c",
+ collapse_mixed:
+ "should \nignore \r\nnew lines" to "should ignore new lines",
+ br_tag:
+ "two<br>line<br/>breaks" to "two\r\nline\r\nbreaks",
+ paragraph:
+ "<p>two</p><p>paragraphs</p>" to "two\r\n\r\nparagraphs",
// Headers
- ("<h1>First</h1>main text", "First\r\n\r\nmain text"),
- (
- "First<h2>Second</h2>next section",
- "First\r\n\r\nSecond\r\n\r\nnext section",
- ),
- ("<h2>Second</h2>next section", "Second\r\n\r\nnext section"),
- (
- "Second<h3>Third</h3>next section",
- "Second\r\n\r\nThird\r\n\r\nnext section",
- ),
- ("<h3>Third</h3>next section", "Third\r\n\r\nnext section"),
- (
- "Third<h4>Fourth</h4>next section",
- "Third\r\n\r\nFourth\r\n\r\nnext section",
- ),
- ("<h4>Fourth</h4>next section", "Fourth\r\n\r\nnext section"),
- (
- "Fourth<h5>Fifth</h5>next section",
- "Fourth\r\n\r\nFifth\r\n\r\nnext section",
- ),
- ("<h5>Fifth</h5>next section", "Fifth\r\n\r\nnext section"),
- (
- "Fifth<h6>Sixth</h6>next section",
- "Fifth\r\n\r\nSixth\r\n\r\nnext section",
- ),
- ("<h6>Sixth</h6>next section", "Sixth\r\n\r\nnext section"),
- ("<h7>Not Header</h7>next section", "Not Headernext section"),
+ h1:
+ "<h1>First</h1>main text" to "First\r\n\r\nmain text",
+ h2_inline:
+ "First<h2>Second</h2>next section"
+ to "First\r\n\r\nSecond\r\n\r\nnext section",
+ h2:
+ "<h2>Second</h2>next section" to "Second\r\n\r\nnext section",
+ h3_inline:
+ "Second<h3>Third</h3>next section"
+ to "Second\r\n\r\nThird\r\n\r\nnext section",
+ h3:
+ "<h3>Third</h3>next section" to "Third\r\n\r\nnext section",
+ h4_inline:
+ "Third<h4>Fourth</h4>next section"
+ to "Third\r\n\r\nFourth\r\n\r\nnext section",
+ h4:
+ "<h4>Fourth</h4>next section" to "Fourth\r\n\r\nnext section",
+ h5_inline:
+ "Fourth<h5>Fifth</h5>next section"
+ to "Fourth\r\n\r\nFifth\r\n\r\nnext section",
+ h5:
+ "<h5>Fifth</h5>next section" to "Fifth\r\n\r\nnext section",
+ h6_inline:
+ "Fifth<h6>Sixth</h6>next section"
+ to "Fifth\r\n\r\nSixth\r\n\r\nnext section",
+ h6:
+ "<h6>Sixth</h6>next section" to "Sixth\r\n\r\nnext section",
+ no_h7:
+ "<h7>Not Header</h7>next section" to "Not Headernext section",
// html entitites
- ("two spaces", "two spaces"),
- ("© 2017 K3A", "© 2017 K3A"),
- ("<printtag>", "<printtag>"),
- (
- "would you pay in ¢, £, ¥ or €?",
- "would you pay in ¢, £, ¥ or €?",
- ),
- (
- "Tom & Jerry is not an entity",
- "Tom & Jerry is not an entity",
- ),
- ("this &neither; as you see", "this &neither; as you see"),
- (
- "list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>",
- "list of items\r\nOne\r\nTwo\r\nThree\r\n",
- ),
- ("fish & chips", "fish & chips"),
- (
- ""I'm sorry, Dave. I'm afraid I can't do that." – HAL, 2001: A Space Odyssey",
- "\"I'm sorry, Dave. I'm afraid I can't do that.\" – HAL, 2001: A Space Odyssey",
- ),
- ("Google ®", "Google ®"),
- (
- "⁌ decimal and hex entities supported ⁍",
- "⁌ decimal and hex entities supported ⁍",
- ),
+ entity_nbsp:
+ "two spaces" to "two spaces",
+ entity_copy:
+ "© 2017 K3A" to "© 2017 K3A",
+ entity_tag:
+ "<printtag>" to "<printtag>",
+ entity_currencies:
+ "would you pay in ¢, £, ¥ or €?"
+ to "would you pay in ¢, £, ¥ or €?",
+ ampersand_not_entity:
+ "Tom & Jerry is not an entity" to "Tom & Jerry is not an entity",
+ entity_unknown:
+ "this &neither; as you see" to "this &neither; as you see",
+ entity_amp:
+ "fish & chips" to "fish & chips",
+ unordered_list:
+ "list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>"
+ to "list of items\r\nOne\r\nTwo\r\nThree\r\n",
+ entity_quot:
+ ""I'm sorry, Dave. I'm afraid I can't do that." – HAL, 2001: A Space Odyssey"
+ to "\"I'm sorry, Dave. I'm afraid I can't do that.\" – HAL, 2001: A Space Odyssey",
+ entity_reg:
+ "Google ®" to "Google ®",
// Large entity
- ("&abcdefghij;", "&abcdefghij;"),
+ entity_large_unknown:
+ "&abcdefghij;" to "&abcdefghij;",
// Numeric HTML entities
- (
- "'single quotes' and 츝",
- "'single quotes' and 츝",
- ),
+ entity_numeric:
+ "⁌ decimal and hex entities supported ⁍"
+ to "⁌ decimal and hex entities supported ⁍",
+ entity_numeric_2:
+ "'single quotes' and 츝"
+ to "'single quotes' and 츝",
// full thml structure
- ("", ""),
- ("<html><head><title>Good</title></head><body>x</body>", "x"),
- (
- "we are not <script type=\"javascript\"></script>interested in scripts",
- "we are not interested in scripts",
- ),
+ empty: "" to "",
+ full_html:
+ "<html><head><title>Good</title></head><body>x</body>" to "x",
+ ignore_script:
+ "we are not <script type=\"javascript\"></script>interested in scripts"
+ to "we are not interested in scripts",
// custom html tags
- ("<aa>hello</aa>", "hello"),
- ("<aa >hello</aa>", "hello"),
- ("<aa x=\"1\">hello</aa>", "hello"),
- ];
-
- #[test]
- fn test_all() {
- for case in cases {
- assert_eq!(&html2text(case.0), case.1);
- }
+ ignore_unknown_tag:
+ "<aa>hello</aa>" to "hello",
+ ignore_unknown_tag_whitespace:
+ "<aa >hello</aa>" to "hello",
+ ignore_unknown_tag_attributes:
+ "<aa x=\"1\">hello</aa>" to "hello",
}
}