View Javadoc
1   package emissary.util;
2   
3   import jakarta.annotation.Nullable;
4   
5   import java.io.ByteArrayInputStream;
6   import java.io.IOException;
7   import java.io.InputStream;
8   import java.io.InputStreamReader;
9   import java.io.Reader;
10  import java.nio.charset.StandardCharsets;
11  import java.security.MessageDigest;
12  import java.security.NoSuchAlgorithmException;
13  import java.util.ArrayList;
14  import java.util.List;
15  
16  /**
17   * Common place for the logic to glue byte arrays back together. This is error-prone and shouldn't be thought about any
18   * more than necessary
19   */
20  public class ByteUtil {
21      public static final byte ASCII_0 = '0';
22      public static final byte ASCII_9 = '9';
23      public static final byte ASCII_A_LC = 'a';
24      public static final byte ASCII_B_LC = 'b';
25      public static final byte ASCII_F_LC = 'f';
26      public static final byte ASCII_Z_LC = 'z';
27      public static final byte ASCII_A_UC = 'A';
28      public static final byte ASCII_F_UC = 'F';
29      public static final byte ASCII_Z_UC = 'Z';
30      public static final byte ASCII_SLASH = '/';
31      public static final byte ASCII_ESC = 0x1b;
32      public static final byte ASCII_SP = 0x20;
33      public static final byte ASCII_DEL = 0x7f;
34      public static final String HEX = "0123456789abcdefABCDEF";
35  
36      /**
37       * Check if byte is hexadecimal
38       *
39       * @param b a byte
40       * @return true if b is a hexadecimal
41       */
42      public static boolean isHexadecimal(byte b) {
43          return (b >= ASCII_A_UC && b <= ASCII_F_UC) || (b >= ASCII_A_LC && b <= ASCII_F_LC) || isDigit(b);
44      }
45  
46      /**
47       * Check if all bytes in array are hexadecimal
48       *
49       * @param array a byte array
50       * @return true if all bytes in array are hexadecimal
51       */
52      public static boolean isHexadecimal(byte[] array) {
53          for (byte b : array) {
54              if (!isHexadecimal(b)) {
55                  return false;
56              }
57          }
58          return true;
59      }
60  
61      /**
62       * Check if character is hexadecimal
63       *
64       * @param c a char
65       * @return true if c is a hexadecimal
66       */
67      public static boolean isHexadecimal(char c) {
68          return HEX.indexOf(c) > -1;
69      }
70  
71      /**
72       * Check if all bytes are alphabetical
73       *
74       * @param array a byte array
75       * @return true if all bytes in array are alpha
76       */
77      public static boolean isAlpha(byte[] array) {
78          for (byte b : array) {
79              if (!isAlpha(b)) {
80                  return false;
81              }
82          }
83          return true;
84      }
85  
86      /**
87       * Check if byte is alphabetical
88       *
89       * @param b a byte
90       * @return true if b is alphabetical
91       */
92      public static boolean isAlpha(byte b) {
93          return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z');
94      }
95  
96      /**
97       * Check if byte is alphanumeric
98       *
99       * @param b a byte
100      * @return true if b is alphanumeric
101      */
102     public static boolean isAlNum(byte b) {
103         return isAlpha(b) || isDigit(b);
104     }
105 
106     /**
107      * Check if byte is a digit
108      *
109      * @param b a byte
110      * @return true if b is a digit
111      */
112     public static boolean isDigit(byte b) {
113         // check ascii value of b for digit-ness
114         return b >= '0' && b <= '9';
115     }
116 
117     /**
118      * Check if all bytes are digits
119      *
120      * @param array a byte array
121      * @return true if all bytes in array are digits
122      */
123     public static boolean isDigit(byte[] array) {
124         for (byte b : array) {
125             if (!isDigit(b)) {
126                 return false;
127             }
128         }
129         return true;
130     }
131 
132     /**
133      * Check if byte at position in array is a control or blank space byte
134      *
135      * @param b a byte array
136      * @param pos a position in the byte array
137      * @return true if byte at pos in array b is a control or blank space byte
138      */
139     public static boolean isControlOrBlankSpace(byte[] b, int pos) {
140         if (b[pos] == ASCII_DEL || b[pos] <= ASCII_SP) {
141             return true;
142         }
143         if (b[pos] == ASCII_B_LC && pos > 0 && b[pos - 1] == ASCII_ESC) {
144             return true;
145         }
146 
147         // Check if the current pos is the first byte in a UTF-8 C1
148         // control character (U+0080..U+009f).
149         final int curr = b[pos] & 0xff;
150         final int next = (pos < (b.length - 1)) ? (b[pos + 1] & 0xff) : -1;
151         if ((curr == 0xc2) && (next >= 0x80) && (next <= 0x9f)) {
152             return true;
153         }
154 
155         // Check if the current pos is the second byte in a UTF-8 C1
156         // control character (U+0080..U+009f).
157         final int prev = (pos > 0) ? (b[pos - 1] & 0xff) : -1;
158         return (prev == 0xc2) && (curr >= 0x80) && (curr <= 0x9f);
159     }
160 
161     /**
162      * Glue two byte arrays together into one
163      * 
164      * @param a the first byte array
165      * @param b the second byte array
166      * @return the whole
167      */
168     public static byte[] glue(@Nullable byte[] a, @Nullable byte[] b) {
169         if (a == null) {
170             return b;
171         }
172         if (b == null) {
173             return a;
174         }
175         return glue(a, 0, a.length - 1, b, 0, b.length - 1);
176     }
177 
178     /**
179      * Glue three byte arrays together into one
180      * 
181      * @param a the first byte array
182      * @param b the second byte array
183      * @param c the third byte array
184      * @return the whole
185      */
186     public static byte[] glue(@Nullable byte[] a, @Nullable byte[] b, @Nullable byte[] c) {
187         if (a == null) {
188             return glue(b, c);
189         }
190         if (b == null) {
191             return glue(a, c);
192         }
193         if (c == null) {
194             return glue(a, b);
195         }
196         return glue(a, 0, a.length - 1, b, 0, b.length - 1, c, 0, c.length - 1);
197     }
198 
199     /**
200      * Glue two byte arrays together into one
201      * 
202      * @param a the first byte array
203      * @param astart starting position in a
204      * @param aend ending position in a
205      * @param b the second byte array
206      * @param bstart starting position in b
207      * @param bend ending position in b
208      * @return the whole
209      */
210     @SuppressWarnings("InconsistentOverloads")
211     public static byte[] glue(byte[] a, int astart, int aend, byte[] b, int bstart, int bend) {
212         int alen = aend - astart + 1;
213         int blen = bend - bstart + 1;
214 
215         byte[] rslt = new byte[alen + blen];
216         System.arraycopy(a, astart, rslt, 0, alen);
217         System.arraycopy(b, bstart, rslt, alen, blen);
218         return rslt;
219     }
220 
221     /**
222      * Glue three byte arrays together into one
223      * 
224      * @param a the first byte array
225      * @param astart starting position in a
226      * @param aend ending position in a
227      * @param b the second byte array
228      * @param bstart starting position in b
229      * @param bend ending position in b
230      * @param c the third byte array
231      * @param cstart starting position in c
232      * @param cend ending position in c
233      * @return the whole
234      */
235     @SuppressWarnings("InconsistentOverloads")
236     public static byte[] glue(byte[] a, int astart, int aend, byte[] b, int bstart, int bend, byte[] c, int cstart, int cend) {
237         int alen = aend - astart + 1;
238         int blen = bend - bstart + 1;
239         int clen = cend - cstart + 1;
240 
241         byte[] rslt = new byte[alen + blen + clen];
242         System.arraycopy(a, astart, rslt, 0, alen);
243         System.arraycopy(b, bstart, rslt, alen, blen);
244         System.arraycopy(c, cstart, rslt, alen + blen, clen);
245         return rslt;
246     }
247 
248     /**
249      * Split a byte array at the specified position
250      * 
251      * @param a the byte array
252      * @param pos the split position (a[pos] goes to the second part)
253      */
254     public static List<byte[]> split(@Nullable byte[] a, int pos) {
255         List<byte[]> list = new ArrayList<>();
256         if (a != null && pos > 0 && pos <= a.length) {
257             byte[] part1 = new byte[pos];
258             byte[] part2 = new byte[a.length - pos];
259             System.arraycopy(a, 0, part1, 0, pos);
260             System.arraycopy(a, pos, part2, 0, part2.length);
261             list.add(part1);
262             list.add(part2);
263         } else {
264             // Just give back the original
265             list.add(a);
266         }
267         return list;
268     }
269 
270     /**
271      * Given a byte-array and a start offset, return a string of the bytes between the start position and a carriage return
272      * byte. In essence, this is grabbing a line of input where the byte array is composed of several lines of input.
273      * 
274      * @param data The byte array of input data.
275      * @param pos The initial start offset.
276      * @return A string created from the bytes found from the start offset to the carriage return byte.
277      */
278     public static String grabLine(byte[] data, int pos) {
279         String ret = null;
280         int eolnPos = -1;
281         for (int i = pos; i < data.length; i++) {
282             if (data[i] == '\n') {
283                 eolnPos = i;
284                 break;
285             }
286         }
287         if (eolnPos != -1) {
288             // String up to the found \n pos
289             ret = new String(data, pos, eolnPos - pos + 1);
290         } else {
291             // String to end of buffer
292             ret = new String(data, pos, data.length - pos);
293         }
294         return ret;
295     }
296 
297     /**
298      * Scans a byte array looking for non-printable values.
299      * 
300      * @param bytes the bytes to be scanned.
301      * @return whether or not there were non-printable values.
302      */
303     public static boolean hasNonPrintableValues(final byte[] bytes) {
304         boolean badCharacters = false;
305 
306         for (byte aByte : bytes) {
307             if (aByte < 9 || (aByte > 13 && aByte < 32)) {
308                 badCharacters = true;
309                 break;
310             }
311         }
312 
313         return badCharacters;
314     }
315 
316     /**
317      * Creates a hex string of a sha256 hash for a byte[].
318      * 
319      * @param bytes to be hashed
320      * @return the hex string of a sha256 hash of the bytes.
321      */
322     @Nullable
323     public static String sha256Bytes(final byte[] bytes) {
324         try {
325             final MessageDigest md = MessageDigest.getInstance("SHA-256");
326             final byte[] hash = md.digest(bytes);
327 
328             final StringBuilder hexString = new StringBuilder(2 * hash.length);
329             for (byte b : hash) {
330                 final String hex = Integer.toHexString(0xff & b);
331 
332                 if (hex.length() == 1) {
333                     hexString.append('0');
334                 }
335                 hexString.append(hex);
336             }
337             return hexString.toString();
338         } catch (NoSuchAlgorithmException e) {
339             return null;
340         }
341     }
342 
343     /**
344      * Check if the bytes contains a non-indexable characters
345      *
346      * @param utf8Bytes the bytes to be scanned
347      * @return whether there were non-indexable characters
348      */
349     public static boolean containsNonIndexableBytes(final byte[] utf8Bytes) {
350         // Wrap the byte array in a ByteArrayInputStream
351         final InputStream inputStream = new ByteArrayInputStream(utf8Bytes);
352         return containsNonIndexableBytes(inputStream);
353     }
354 
355     /**
356      * Check if the input stream contains a non-indexable characters
357      *
358      * @param inputStream the input stream to be scanned
359      * @return whether there were non-indexable characters
360      */
361     public static boolean containsNonIndexableBytes(final InputStream inputStream) {
362         // Create an InputStreamReader to read the bytes as characters
363         try (Reader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8)) {
364             int codePoint;
365             // Use the read() method of the InputStreamReader to read code points. The read() method automatically handles
366             // surrogate pairs, returning a single code point even for characters represented by multiple code units.
367             while ((codePoint = reader.read()) != -1) {
368                 // Check if the code point is indexable
369                 if (isNotIndexable(codePoint)) {
370                     return true;
371                 }
372             }
373             return false;
374         } catch (IOException e) {
375             return true;
376         }
377     }
378 
379     /**
380      * Check if the code point is a control character or surrogate pair
381      * <a href="https://en.wikipedia.org/wiki/Unicode_block">Unicode Block</a>
382      * <a href="https://www.unicode.org/charts/PDF/U0000.pdf">U0000</a>
383      * <a href="https://www.unicode.org/charts/PDF/U2000.pdf">U2000</a>
384      * <a href="https://www.unicode.org/charts/PDF/U3000.pdf">U3000</a>
385      * <a href="https://www.unicode.org/charts/PDF/UFE70.pdf">UFE70</a>
386      * <a href="https://www.unicode.org/charts/PDF/UFFF0.pdf">UFFF0</a>
387      *
388      * @param codepoint numerical value that maps to a specific character to check
389      * @return if code-point is a valid text character
390      */
391     private static boolean isNotIndexable(final int codepoint) {
392         return ('\u0000' <= codepoint && codepoint <= '\u0008')
393                 || ('\u000E' <= codepoint && codepoint <= '\u001F')
394                 || ('\u007F' <= codepoint && codepoint <= '\u009F')
395                 || ('\u2000' <= codepoint && codepoint <= '\u200F')
396                 || ('\u2028' <= codepoint && codepoint <= '\u202F')
397                 || ('\u205F' <= codepoint && codepoint <= '\u206F')
398                 || codepoint == '\u3000'
399                 || codepoint == '\uFEFF'
400                 || codepoint == '\uFFFD';
401     }
402 
403     /** This class is not meant to be instantiated. */
404     private ByteUtil() {}
405 }