1 package emissary.transform.decode; 2 3 import emissary.test.core.junit5.UnitTest; 4 import emissary.util.CharacterCounterSet; 5 6 import org.junit.jupiter.api.Test; 7 8 import static org.junit.jupiter.api.Assertions.assertEquals; 9 import static org.junit.jupiter.api.Assertions.assertNotNull; 10 import static org.junit.jupiter.api.Assertions.assertNull; 11 12 @Deprecated 13 class HtmlEscapeTest extends UnitTest { 14 15 private static final String W = "Президент Буш"; 16 17 @Test 18 void testEntity() { 19 String[] t = { 20 "<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test Space", "Copy©Right", 21 W + "»<font color=\"navy\">"}; 22 23 String[] ans = { 24 "<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "Test Space", "Copy\u00A9Right", W + "\u00BB<font color=\"navy\">"}; 25 26 for (int i = 0; i < t.length; i++) { 27 String s = HtmlEscape.unescapeEntities(t[i]); 28 assertEquals(ans[i], s, "Entities must be escaped in string '" + t[i] + "'"); 29 byte[] b = HtmlEscape.unescapeEntities(t[i].getBytes()); 30 assertEquals(ans[i], new String(b), "Entity bytes must be escaped in '" + t[i] + "'"); 31 } 32 } 33 34 @Test 35 void testBrokenEntity() { 36 String[] t = {"Test Space", "Test&;nbsp;Space", "Test  Space", W + "&;raquo;<font color=\"navy\">"}; 37 38 String[] ans = {"Test Space", "Test Space", "Test Space", W + "\u00BB<font color=\"navy\">"}; 39 40 for (int i = 0; i < t.length; i++) { 41 String s = HtmlEscape.unescapeEntities(t[i]); 42 assertEquals(ans[i], s, "Entities must be escaped in string '" + t[i] + "'"); 43 byte[] b = HtmlEscape.unescapeEntities(t[i].getBytes()); 44 assertEquals(ans[i], new String(b), "Entity bytes must be escaped in '" + t[i] + "'"); 45 } 46 } 47 48 @Test 49 void testEntityRemovalInString() { 50 String t = "anti­dis­estab­lish­ment­ary"; 51 String s = "antidisestablishmentary"; 52 assertEquals(s, HtmlEscape.unescapeEntities(t), "Entities should have been removed in string"); 53 } 54 55 @Test 56 void testEntityRemovalInBytes() { 57 String t = "anti­dis­estab­lish­ment­ary"; 58 String s = "antidisestablishmentary"; 59 assertEquals(s, new String(HtmlEscape.unescapeEntities(t.getBytes())), "Entities should have been removed in bytes"); 60 } 61 62 @Test 63 void testEscapingBeyondBMPInString() { 64 String t = "Test 𝒥 Script J"; 65 StringBuilder sb = new StringBuilder(); 66 sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J"); 67 assertEquals(sb.toString(), HtmlEscape.unescapeHtml(t), "Hex char beyond BMP must be escaped in String"); 68 } 69 70 @Test 71 void testEscapingBeyondBMPInBytes() { 72 String t = "Test 𝒥 Script J"; 73 StringBuilder sb = new StringBuilder(); 74 sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J"); 75 assertEquals(sb.toString(), new String(HtmlEscape.unescapeHtml(t.getBytes())), "Hex char beyond BMP must be escaped in bytes"); 76 } 77 78 @Test 79 void testEscapeEntityBeyondBMPInString() { 80 String t = "Test 𝒥 Script J"; 81 StringBuilder sb = new StringBuilder(); 82 sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J"); 83 assertEquals(sb.toString(), HtmlEscape.unescapeEntities(t), "Entity beyond BMP must be escaped in String"); 84 } 85 86 @Test 87 void testEscapeEntityBeyondBMPInBytes() { 88 String t = "Test 𝒥 Script J"; 89 StringBuilder sb = new StringBuilder(); 90 sb.append("Test ").appendCodePoint(0x1D4A5).append(" Script J"); 91 assertEquals(sb.toString(), new String(HtmlEscape.unescapeEntities(t.getBytes())), "Entity beyond BMP must be escaped in bytes"); 92 } 93 94 @Test 95 void testEscape() { 96 String[] t = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy©Right"}; 97 98 String[] ans = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy\u00A9Right"}; 99 100 for (int i = 0; i < t.length; i++) { 101 String s = HtmlEscape.unescapeHtml(t[i]); 102 assertEquals(ans[i], s, "Characters must be escaped in " + t[i]); 103 byte[] b = HtmlEscape.unescapeHtml(t[i].getBytes()); 104 assertEquals(ans[i], new String(b), "Character bytes must be escaped in " + t[i]); 105 } 106 } 107 108 @Test 109 void testHexEscapeWithoutLeadingZero() { 110 String t = "Президент Буш"; 111 String s = W; 112 assertEquals(s, HtmlEscape.unescapeHtml(t), "Hex characters must be escaped"); 113 assertEquals(s, new String(HtmlEscape.unescapeHtml(t.getBytes())), "Hex characters must be escaped"); 114 } 115 116 @Test 117 void testHexEscape() { 118 String[] t = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy©Right"}; 119 120 String[] ans = {"<HTML><HEAD></HEAD><BODY>Help me. No change.</BODY></HTML>", "TestaSpace", "Copy\u00A9Right"}; 121 122 for (int i = 0; i < t.length; i++) { 123 String s = HtmlEscape.unescapeHtml(t[i]); 124 assertEquals(ans[i], s, "Hex Characters must be escaped in " + t[i]); 125 byte[] b = HtmlEscape.unescapeHtml(t[i].getBytes()); 126 assertEquals(ans[i], new String(b), "Hex Character bytes must be escaped in " + t[i]); 127 } 128 } 129 130 @Test 131 void testNullInput() { 132 assertEquals("", HtmlEscape.unescapeHtml((String) null), "Null cannot be returned for null input"); 133 } 134 135 @Test 136 void testEmptyInput() { 137 assertEquals("", HtmlEscape.unescapeHtml(""), "Null cannot be returned for null input"); 138 } 139 140 @Test 141 void testNullByteInput() { 142 assertNotNull(HtmlEscape.unescapeHtml((byte[]) null), "Null cannot be returned for null input"); 143 assertEquals(0, HtmlEscape.unescapeHtml((byte[]) null).length, "Empty array returned for null input"); 144 } 145 146 @Test 147 void testEmptyByteInput() { 148 assertEquals(0, HtmlEscape.unescapeHtml(new byte[0]).length, "Empty array returned for 0 length input"); 149 } 150 151 @Test 152 void testHexInputAsString() { 153 assertNull(HtmlEscape.unescapeHtmlChar("ffff", false), "Unescape non-hex input"); 154 } 155 156 @Test 157 void testNonHexInputAsHex() { 158 assertNull(HtmlEscape.unescapeHtmlChar("gggg", true), "Unescape non-hex input"); 159 } 160 161 @Test 162 void testNonterminatedEntityMarkerInByteArray() { 163 String s = "alors le ré"; 164 String t = "alors le ré"; 165 assertEquals(t, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case"); 166 } 167 168 @Test 169 void testNonterminatedEntityMarkerInString() { 170 String s = "alors le ré"; 171 String t = "alors le ré"; 172 assertEquals(t, HtmlEscape.unescapeEntities(s), "Non terminating entity case"); 173 } 174 175 @Test 176 void testNonterminatedEntityMarkerWithSpaceInByteArray() { 177 String s = "&;foobarb "; 178 assertEquals(s, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case"); 179 } 180 181 @Test 182 void testNonterminatedEntityMarkerWithSpaceInString() { 183 String s = "&;foobarb "; 184 assertEquals(s, HtmlEscape.unescapeEntities(s), "Non terminating entity case"); 185 } 186 187 @Test 188 void testNonterminatedEntityMarkerWithExtraSemicolonInByteArray() { 189 String s = "&;foobarb"; 190 assertEquals(s, new String(HtmlEscape.unescapeEntities(s.getBytes())), "Non terminating entity case"); 191 } 192 193 @Test 194 void testNonterminatedEntityMarkerWithExtraSemicolonInString() { 195 String s = "&;foobarb"; 196 assertEquals(s, HtmlEscape.unescapeEntities(s), "Non terminating entity case"); 197 } 198 199 @Test 200 void testMissingSemicolonInString() { 201 assertEquals("a b", HtmlEscape.unescapeEntities("a  b"), "Missing semi-colon must be handled"); 202 } 203 204 @Test 205 void testMissingSemicolonInByteArray() { 206 assertEquals("a b", new String(HtmlEscape.unescapeEntities("a  b".getBytes())), "Missing semi-colon must be handled"); 207 } 208 209 @Test 210 void testExtraSemicolonInString() { 211 assertEquals("a b", HtmlEscape.unescapeEntities("a&;nbsp;b"), "Extra semi-colon must be handled"); 212 } 213 214 @Test 215 void testExtraSemicolonInByteArray() { 216 assertEquals("a b", new String(HtmlEscape.unescapeEntities("a&;nbsp;b".getBytes())), "Extra semi-colon must be handled"); 217 } 218 219 @Test 220 void testHandlingOf160AndNbspAreIdentical() { 221 assertEquals("a b", new String(HtmlEscape.unescapeEntities("a  b".getBytes())), "Entity 160 is an nbsp"); 222 } 223 224 @Test 225 void testCountingOfBlankSpaceEscapes() { 226 CharacterCounterSet c = new CharacterCounterSet(); 227 HtmlEscape.unescapeEntities("a b ", c); 228 assertEquals(2, c.getBlankSpaceCount(), "Counted nbsp as blank space"); 229 } 230 231 @Test 232 void testCountingOfBlankSpaceEscapesAsBytes() { 233 CharacterCounterSet c = new CharacterCounterSet(); 234 HtmlEscape.unescapeEntities("a b ".getBytes(), c); 235 assertEquals(2, c.getBlankSpaceCount(), "Counted nbsp as blank space"); 236 } 237 238 @Test 239 void testCountingEncodedLetters() { 240 CharacterCounterSet c = new CharacterCounterSet(); 241 String s = "alors le ré"; 242 HtmlEscape.unescapeEntities(s, c); 243 assertEquals(1, c.getLetterCount(), "Counted eacute as letter"); 244 } 245 246 @Test 247 void testCountingEncodedLettersAsBytes() { 248 CharacterCounterSet c = new CharacterCounterSet(); 249 String s = "alors le ré"; 250 HtmlEscape.unescapeEntities(s.getBytes(), c); 251 assertEquals(1, c.getLetterCount(), "Counted eacute as letter"); 252 } 253 254 @Test 255 void testTwoDigitNumericString() { 256 assertEquals(",", HtmlEscape.unescapeHtml(","), "Short numeric encoded string"); 257 } 258 259 @Test 260 void testTwoDigitNumericByteArray() { 261 assertEquals(",", new String(HtmlEscape.unescapeHtml(",".getBytes())), "Short numeric encoded byte array"); 262 } 263 264 @Test 265 void testTwoDigitNumericHexString() { 266 assertEquals(",", HtmlEscape.unescapeHtml(","), "Short numeric encoded hex string"); 267 } 268 269 @Test 270 void testTwoDigitNumericHexByteArray() { 271 assertEquals(",", new String(HtmlEscape.unescapeHtml(",".getBytes())), "Short numeric encoded hex byte array"); 272 } 273 }