View Javadoc
1   package emissary.transform.decode;
2   
3   import emissary.test.core.junit5.UnitTest;
4   import emissary.util.CharacterCounterSet;
5   
6   import org.junit.jupiter.api.Test;
7   
8   import static org.junit.jupiter.api.Assertions.assertEquals;
9   import static org.junit.jupiter.api.Assertions.assertNotNull;
10  import static org.junit.jupiter.api.Assertions.assertNull;
11  
12  @Deprecated
13  class HtmlEscapeTest extends UnitTest {
14  
15      private static final String W = "Президент Буш";
16  
17      @Test
18      void testEntity() {
19          String[] t = {
20                  "<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test&nbsp;Space", "Copy&copy;Right",
21                  W + "&raquo;<font  color=\"navy\">"};
22  
23          String[] ans = {
24                  "<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test Space", "Copy\u00A9Right", W + "\u00BB<font  color=\"navy\">"};
25  
26          for (int i = 0; i < t.length; i++) {
27              String s = HtmlEscape.unescapeEntities(t[i]);
28              assertEquals(ans[i], s, "Entities must be escaped in string '" + t[i] + "'");
29              byte[] b = HtmlEscape.unescapeEntities(t[i].getBytes());
30              assertEquals(ans[i], new String(b), "Entity bytes must be escaped in '" + t[i] + "'");
31          }
32      }
33  
34      @Test
35      void testBrokenEntity() {
36          String[] t = {"Test&nbsp;Space", "Test&;nbsp;Space", "Test&nbsp Space", W + "&;raquo;<font  color=\"navy\">"};
37  
38          String[] ans = {"Test Space", "Test Space", "Test  Space", W + "\u00BB<font  color=\"navy\">"};
39  
40          for (int i = 0; i < t.length; i++) {
41              String s = HtmlEscape.unescapeEntities(t[i]);
42              assertEquals(ans[i], s, "Entities must be escaped in string '" + t[i] + "'");
43              byte[] b = HtmlEscape.unescapeEntities(t[i].getBytes());
44              assertEquals(ans[i], new String(b), "Entity bytes must be escaped in '" + t[i] + "'");
45          }
46      }
47  
48      @Test
49      void testEntityRemovalInString() {
50          String t = "anti&shy;dis&shy;estab&shy;lish&shy;ment&shy;ary";
51          String s = "antidisestablishmentary";
52          assertEquals(s, HtmlEscape.unescapeEntities(t), "Entities should have been removed in string");
53      }
54  
55      @Test
56      void testEntityRemovalInBytes() {
57          String t = "anti&shy;dis&shy;estab&shy;lish&shy;ment&shy;ary";
58          String s = "antidisestablishmentary";
59          assertEquals(s, new String(HtmlEscape.unescapeEntities(t.getBytes())), "Entities should have been removed in bytes");
60      }
61  
62      @Test
63      void testEscapingBeyondBMPInString() {
64          String t = "Test &#x1D4A5; Script J";
65          StringBuilder sb = new StringBuilder();
66          sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J");
67          assertEquals(sb.toString(), HtmlEscape.unescapeHtml(t), "Hex char beyond BMP must be escaped in String");
68      }
69  
70      @Test
71      void testEscapingBeyondBMPInBytes() {
72          String t = "Test &#x1D4A5; Script J";
73          StringBuilder sb = new StringBuilder();
74          sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J");
75          assertEquals(sb.toString(), new String(HtmlEscape.unescapeHtml(t.getBytes())), "Hex char beyond BMP must be escaped in bytes");
76      }
77  
78      @Test
79      void testEscapeEntityBeyondBMPInString() {
80          String t = "Test &Jscr; Script J";
81          StringBuilder sb = new StringBuilder();
82          sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J");
83          assertEquals(sb.toString(), HtmlEscape.unescapeEntities(t), "Entity beyond BMP must be escaped in String");
84      }
85  
86      @Test
87      void testEscapeEntityBeyondBMPInBytes() {
88          String t = "Test &Jscr; Script J";
89          StringBuilder sb = new StringBuilder();
90          sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J");
91          assertEquals(sb.toString(), new String(HtmlEscape.unescapeEntities(t.getBytes())), "Entity beyond BMP must be escaped in bytes");
92      }
93  
94      @Test
95      void testEscape() {
96          String[] t = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test&#0097;Space", "Copy&#0169;Right"};
97  
98          String[] ans = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy\u00A9Right"};
99  
100         for (int i = 0; i < t.length; i++) {
101             String s = HtmlEscape.unescapeHtml(t[i]);
102             assertEquals(ans[i], s, "Characters must be escaped in " + t[i]);
103             byte[] b = HtmlEscape.unescapeHtml(t[i].getBytes());
104             assertEquals(ans[i], new String(b), "Character bytes must be escaped in " + t[i]);
105         }
106     }
107 
108     @Test
109     void testHexEscapeWithoutLeadingZero() {
110         String t = "&#x41F;&#x440;&#x435;&#x437;&#x438;&#x434;&#x435;&#x43D;&#x442; &#x411;&#x443;&#x448;";
111         String s = W;
112         assertEquals(s, HtmlEscape.unescapeHtml(t), "Hex characters must be escaped");
113         assertEquals(s, new String(HtmlEscape.unescapeHtml(t.getBytes())), "Hex characters must be escaped");
114     }
115 
116     @Test
117     void testHexEscape() {
118         String[] t = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test&#x0061;Space", "Copy&#x00a9;Right"};
119 
120         String[] ans = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy\u00A9Right"};
121 
122         for (int i = 0; i < t.length; i++) {
123             String s = HtmlEscape.unescapeHtml(t[i]);
124             assertEquals(ans[i], s, "Hex Characters must be escaped in " + t[i]);
125             byte[] b = HtmlEscape.unescapeHtml(t[i].getBytes());
126             assertEquals(ans[i], new String(b), "Hex Character bytes must be escaped in " + t[i]);
127         }
128     }
129 
130     @Test
131     void testNullInput() {
132         assertEquals("", HtmlEscape.unescapeHtml((String) null), "Null cannot be returned for null input");
133     }
134 
135     @Test
136     void testEmptyInput() {
137         assertEquals("", HtmlEscape.unescapeHtml(""), "Null cannot be returned for null input");
138     }
139 
140     @Test
141     void testNullByteInput() {
142         assertNotNull(HtmlEscape.unescapeHtml((byte[]) null), "Null cannot be returned for null input");
143         assertEquals(0, HtmlEscape.unescapeHtml((byte[]) null).length, "Empty array returned for null input");
144     }
145 
146     @Test
147     void testEmptyByteInput() {
148         assertEquals(0, HtmlEscape.unescapeHtml(new byte[0]).length, "Empty array returned for 0 length input");
149     }
150 
151     @Test
152     void testHexInputAsString() {
153         assertNull(HtmlEscape.unescapeHtmlChar("ffff", false), "Unescape non-hex input");
154     }
155 
156     @Test
157     void testNonHexInputAsHex() {
158         assertNull(HtmlEscape.unescapeHtmlChar("gggg", true), "Unescape non-hex input");
159     }
160 
161     @Test
162     void testNonterminatedEntityMarkerInByteArray() {
163         String s = "alors le r&eacute";
164         String t = "alors le ré";
165         assertEquals(t, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case");
166     }
167 
168     @Test
169     void testNonterminatedEntityMarkerInString() {
170         String s = "alors le r&eacute";
171         String t = "alors le ré";
172         assertEquals(t, HtmlEscape.unescapeEntities(s), "Non terminating entity case");
173     }
174 
175     @Test
176     void testNonterminatedEntityMarkerWithSpaceInByteArray() {
177         String s = "&;foobarb ";
178         assertEquals(s, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case");
179     }
180 
181     @Test
182     void testNonterminatedEntityMarkerWithSpaceInString() {
183         String s = "&;foobarb ";
184         assertEquals(s, HtmlEscape.unescapeEntities(s), "Non terminating entity case");
185     }
186 
187     @Test
188     void testNonterminatedEntityMarkerWithExtraSemicolonInByteArray() {
189         String s = "&;foobarb";
190         assertEquals(s, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case");
191     }
192 
193     @Test
194     void testNonterminatedEntityMarkerWithExtraSemicolonInString() {
195         String s = "&;foobarb";
196         assertEquals(s, HtmlEscape.unescapeEntities(s), "Non terminating entity case");
197     }
198 
199     @Test
200     void testMissingSemicolonInString() {
201         assertEquals("a  b", HtmlEscape.unescapeEntities("a&nbsp b"), "Missing semi-colon must be handled");
202     }
203 
204     @Test
205     void testMissingSemicolonInByteArray() {
206         assertEquals("a  b", new String(HtmlEscape.unescapeEntities("a&nbsp b".getBytes())), "Missing semi-colon must be handled");
207     }
208 
209     @Test
210     void testExtraSemicolonInString() {
211         assertEquals("a b", HtmlEscape.unescapeEntities("a&;nbsp;b"), "Extra semi-colon must be handled");
212     }
213 
214     @Test
215     void testExtraSemicolonInByteArray() {
216         assertEquals("a b", new String(HtmlEscape.unescapeEntities("a&;nbsp;b".getBytes())), "Extra semi-colon must be handled");
217     }
218 
219     @Test
220     void testHandlingOf160AndNbspAreIdentical() {
221         assertEquals("a  b", new String(HtmlEscape.unescapeEntities("a&#160;&nbsp;b".getBytes())), "Entity 160 is an nbsp");
222     }
223 
224     @Test
225     void testCountingOfBlankSpaceEscapes() {
226         CharacterCounterSet c = new CharacterCounterSet();
227         HtmlEscape.unescapeEntities("a&nbsp;b&#160;", c);
228         assertEquals(2, c.getBlankSpaceCount(), "Counted nbsp as blank space");
229     }
230 
231     @Test
232     void testCountingOfBlankSpaceEscapesAsBytes() {
233         CharacterCounterSet c = new CharacterCounterSet();
234         HtmlEscape.unescapeEntities("a&nbsp;b&#160;".getBytes(), c);
235         assertEquals(2, c.getBlankSpaceCount(), "Counted nbsp as blank space");
236     }
237 
238     @Test
239     void testCountingEncodedLetters() {
240         CharacterCounterSet c = new CharacterCounterSet();
241         String s = "alors le r&eacute;";
242         HtmlEscape.unescapeEntities(s, c);
243         assertEquals(1, c.getLetterCount(), "Counted eacute as letter");
244     }
245 
246     @Test
247     void testCountingEncodedLettersAsBytes() {
248         CharacterCounterSet c = new CharacterCounterSet();
249         String s = "alors le r&eacute;";
250         HtmlEscape.unescapeEntities(s.getBytes(), c);
251         assertEquals(1, c.getLetterCount(), "Counted eacute as letter");
252     }
253 
254     @Test
255     void testTwoDigitNumericString() {
256         assertEquals(",", HtmlEscape.unescapeHtml("&#44;"), "Short numeric encoded string");
257     }
258 
259     @Test
260     void testTwoDigitNumericByteArray() {
261         assertEquals(",", new String(HtmlEscape.unescapeHtml("&#44;".getBytes())), "Short numeric encoded byte array");
262     }
263 
264     @Test
265     void testTwoDigitNumericHexString() {
266         assertEquals(",", HtmlEscape.unescapeHtml("&#x2c;"), "Short numeric encoded hex string");
267     }
268 
269     @Test
270     void testTwoDigitNumericHexByteArray() {
271         assertEquals(",", new String(HtmlEscape.unescapeHtml("&#x2c;".getBytes())), "Short numeric encoded hex byte array");
272     }
273 }