View Javadoc
1   package emissary.util;
2   
3   import emissary.util.magic.MagicNumber;
4   import emissary.util.magic.MagicNumberFactory;
5   import emissary.util.shell.Executrix;
6   
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.File;
11  import java.io.IOException;
12  import java.util.ArrayList;
13  import java.util.List;
14  import java.util.Map;
15  import java.util.TreeMap;
16  
17  /**
18   * Magic entry rules when using the Java utility, MagicNumberUtil
19   *
20   * A. Examples:
21   *
22   * Java ByteCode From Larry Schwimmer (schwim@cs.stanford.edu) 0 belong 0xcafebabe compiled Java class data, >6
23   * beshort x version %d. >4 beshort x \b%d
24   *
25   * The entries must have 4 columns where the first three are delimited by blank space as tabs, or spaces, or both and
26   * the remaining columns will be stored as the description. Since spaces are also delimiters - if the value column
27   * (third column) requires a space then it should be escaped.
28   *
29   * B. Offset Column 1. A decimal, hex, or octal value preceded or not preceded by '>' 2. Decimal: n* - if the
30   * occurrences is > 1, then not preceded by '0' 3. Hex: 0xn* 4. Octal: 0n* 5. Offset values in the format '(n.s+32)'
31   * are ignored. These only occurred in the continuations
32   *
33   * C. Data Type Column 1. BYTE, SHORT, LONG, STRING, BESHORT, BELONG, LESHORT, LELONG 2. LEDATE, BEDATE, and DATE are
34   * not supported 3. Masking: If the data type is followed by a mask value in decimal, octal or hex and delimited by
35   * '&' then the value column will be stored as the product of the masking. The mask value cannot exceed the data
36   * type length.
37   *
38   * D. Value columns 1. String, Byte, Short, or Long - 1, 2, or 4 byte values - or any length value for the string type.
39   * 2. String values can be escaped. a. Example "\0\40\320\3200\4text\ \7\x40\r\t\" parsed as: "0-32-208-208-0-4-text-
40   * -7-64- " with the dashes removed.
41   *
42   * 1. Escaping number values Numbers can be of length up to three octal or two hex and can also be terminated by
43   * non-digits and finally must be less then 256. These numeric values are substituted into their respective byte
44   * positions.
45   *
46   * - 3200 will be evaluated as '320' octal and '0' string. - 0 as 0 octal - 40 as 40 octal - x40 as 40 hex which is 64 -
47   * 4text as 4 and succeeded by the characters 't' 'e' 'x' 't'
48   *
49   * 2. Escaping characters including spaces - Spaces must be escaped - otherwise they'll be tokenized into the next
50   * column - 4\ 4\4 results in [char 4, char space, char 4, integer 4] - Ascii values 8-15 can be escaped as: \b\t\n\r
51   * etc... see man page for ascii - trailing slashes will result in the placement of a trailing space
52   *
53   * 3. Substitution Example >4 beshort x \bversion %d.%c Substitution is allowed for continuations only. In this case,
54   * the short byte array will be sampled from the document at offset 4 and length 2. This stored value can be substituted
55   * in the description field where %c or %s will substitute convert the number into a unicode character and %d %ld and
56   * other numeric data types will instead substitute the numeric value.
57   *
58   * In the above, if value 'x' at offset 4 equals 0101 octal, then the substitutions will be a decimal value of 64 and
59   * the character value of 'A' resulting in: "\bversion 64.A
60   *
61   * E. Description The description is comprised of all remaining columns once the first three have been discovered. They
62   * can be blank in continuations since continuations may depend upon the successful testing of preceding continuations.
63   * In other words:
64   *
65   * 0 long 0xcafebabe java binary >4 byte x version %d
66   *
67   * can be re-written as
68   *
69   * 0 short 0xcafe java >4 byte 0xba >>6 byte 0xbe \b\bbinary >>>4 byte x version %d
70   *
71   * where the continuations will only occur upon the completion of '>4 byte 0xba'
72   *
73   * 1. The descriptions can be escaped with a '\b' 2. Each continuation is prefixed with a space when added to a
74   * description. To avoid this or remove spaces use the '\b' backspace and it will perform a backspace function or trim
75   * previous character. 3. See Value column for substitution rules
76   */
77  
78  public final class MagicNumberUtil {
79  
80      private static final Logger log = LoggerFactory.getLogger(MagicNumberUtil.class);
81  
82      /** The magic number instances */
83      private final List<MagicNumber> magicNumbers = new ArrayList<>();
84  
85      /**
86       * Log flag for storing parse errors - they will just be discarded. Switching this on will allow erroneous entries to be
87       * logged and can be retrieved using the method getErrorLog to find out which entries had parsing errors. Using the
88       * magic file shipped with version unix file 3.39 only three/four primary entries were unsupported - these had to do
89       * with signed data types such as ubelong. Or the value was larger then the specified data type which occurred once.
90       * Otherwise, remaining errors were in continuations - mainly when the offset value was in the form of n.s+32 where 'n'
91       * is a decimal value and 's' could not be determined.
92       */
93      private boolean logErrors = false;
94  
95      /**
96       * Log data structure for continuations. Maps entries with depth 0 with a List of continuation entries containing the
97       * errors
98       */
99      private final Map<String, List<String>> extErrorMap = new TreeMap<>();
100 
101     /**
102      * Log data structure for entries with a depth of '0' - these are the important entries. Just maintains a simple list of
103      * these entries
104      */
105     private final List<String> errorList = new ArrayList<>();
106 
107     /**
108      * Private Constructor
109      */
110     public MagicNumberUtil() {}
111 
112     /**
113      * Main testing - plug in the magic file as the first arg and the target file to be examined as the second.
114      *
115      * Usage: java xxx.MagicNumberUtil [magic config file absolute path] [target file to be id'd]
116      */
117     public static void main(final String[] args) {
118         if (args.length < 2) {
119             log.info("Usage: java xxx.MagicNumberUtil [magic config file absolute path] [target file to be id'd]");
120         }
121 
122         final MagicNumberUtil util = new MagicNumberUtil();
123         try {
124             // make sure the magic file exists
125             final File magicFile = new File(args[0]);
126             if (!magicFile.exists()) {
127                 log.info("Could not find the magic config file at: {}", magicFile.getAbsolutePath());
128                 System.exit(0);
129             }
130             // make sure the target file can be found as well
131             final File target = new File(args[1]);
132             if (!target.exists()) {
133                 log.info("Could not find the target file at: {}", target.getAbsolutePath());
134             }
135 
136             // load the magic numbers
137             util.load(new File(args[0]));
138 
139         } catch (Exception e) {
140             log.error("Error in main", e);
141         }
142         // if error logging is enabled on the 'logErrors' flag then this will print out the entry and continuation
143         // parsing errors
144         if (util.logErrors) {
145             log.error(util.getErrorLog());
146         }
147     }
148 
149     public void setErrorLogging(final boolean logErr) {
150         this.logErrors = logErr;
151     }
152 
153     /**
154      * Input a byte array sample and it will be compared against the global magic number list. Descriptions for matching
155      * entries inclusive of continuations.
156      *
157      * @param data a byte[]
158      * @return {@link String} representing matching description plus matching continuation descriptions or null.
159      * @throws RuntimeException If the magic file has not been loaded globally using the load methods.
160      * @see #load(java.io.File)
161      * @see #load(byte[])
162      */
163     public String describe(final byte[] data) {
164         log.debug("Checking against {} magic items", this.magicNumbers.size());
165         String description = null;
166         for (final MagicNumber item : this.magicNumbers) {
167             log.debug("Checking magic item {}", item);
168             description = item.describe(data);
169             if (description != null && !description.isEmpty()) {
170                 break;
171             }
172         }
173         return description;
174     }
175 
176     /**
177      * Input a byte array sample and it will be compared against the global magic number list. Descriptions for matching
178      * entries inclusive of continuations provided.
179      *
180      * @param target data a java.io.File
181      * @return A string representing matching description plus matching continuation descriptions or null.
182      * @throws RuntimeException If the magic file has not been loaded globally using the load methods.
183      * @throws IOException If a read error occurs loading the target file.
184      * @see #load(java.io.File)
185      * @see #load(byte[])
186      */
187     public String describe(final File target) throws IOException {
188         try {
189             if (!target.exists()) {
190                 throw new IOException("Target file not found at: " + target.getAbsolutePath());
191             }
192         } catch (SecurityException se) {
193             throw new IOException("Security Exception reading file: " + se.getMessage());
194         }
195         return describe(Executrix.readDataFromFile(target.getAbsolutePath()));
196     }
197 
198     /**
199      * Do not load magic file globally and do not compare against the global magic number list and instead compare target
200      * against the specified magic file. The magic file will be read/parsed each time as the comparative file. Useful for
201      * debugging or if certain files can be narrowed down to a smaller magic file list improving id performance.
202      *
203      * @param target a java.io.File specifying the file to be id'd
204      * @param magicConfig the magic file containing the magic number entries to use
205      * @return {@link String} representing the id description or null
206      * @throws IOException If an IO error occurs while reading either file.
207      */
208     public static String describe(final File target, final File magicConfig) throws IOException {
209         try {
210             if (!target.exists()) {
211                 throw new IOException("Target file not found at: " + target.getAbsolutePath());
212             } else if (!magicConfig.exists()) {
213                 throw new IOException("Magic config file not found at: " + magicConfig.getAbsolutePath());
214             }
215         } catch (SecurityException se) {
216             throw new IOException("Security Exception reading file: " + se.getMessage());
217         }
218 
219         return describe(Executrix.readDataFromFile(target.getAbsolutePath()), magicConfig);
220     }
221 
222     /**
223      * Do not load magic file globally and do not compare against the global magic number list and compare target against
224      * the specified magic file instead. The magic file will be read/parsed each time as the comparative file. Useful for
225      * debugging or if certain files can be narrowed down to a smaller magic file list improving id performance.
226      *
227      * @param sample a byte[] containing the data to be id'd
228      * @param magicConfig the magic file containing the magic number entries to use
229      * @return {@link String} representing the id description or null
230      * @throws IOException If an IO error occurs while reading either file.
231      */
232     public static String describe(final byte[] sample, final File magicConfig) throws IOException {
233         try {
234             if (!magicConfig.exists()) {
235                 throw new IOException("Magic config file not found at: " + magicConfig.getAbsolutePath());
236             }
237         } catch (SecurityException se) {
238             throw new IOException("Security Exception reading file: " + se.getMessage());
239         }
240 
241         final List<MagicNumber> magicNumberList =
242                 MagicNumberFactory.buildMagicNumberList(Executrix.readDataFromFile(magicConfig.getAbsolutePath()), null, null);
243 
244         String description = null;
245         for (final MagicNumber item : magicNumberList) {
246             description = item.describe(sample);
247             if (description != null) {
248                 break;
249             }
250         }
251         return description;
252     }
253 
254     /**
255      * Load the magic number list globally.
256      *
257      * @param config the java.io.File pointing to the magic file
258      * @exception IOException if one occurs while reading the config file or if a security access error occurs
259      */
260     public void load(final File config) throws IOException {
261         load(config, false);
262     }
263 
264     /**
265      * Load the magic number list globally.
266      *
267      * @param config the java.io.File pointing to the magic file
268      * @param swallowParseException should we swallow Ignorable ParseException or bubble them up
269      * @exception IOException if one occurs while reading the config file or if a security access error occurs
270      */
271     public void load(final File config, final boolean swallowParseException) throws IOException {
272         try {
273             if (!config.exists()) {
274                 throw new IOException("File not found");
275             }
276         } catch (SecurityException se) {
277             throw new IOException("Security Exception: " + se.getMessage());
278         }
279 
280         List<String> mErrorList = null;
281         Map<String, List<String>> mExtErrorMap = null;
282         if (this.logErrors) {
283             mErrorList = this.errorList;
284             mExtErrorMap = this.extErrorMap;
285         }
286         this.magicNumbers.addAll(MagicNumberFactory.buildMagicNumberList(Executrix.readDataFromFile(config.getAbsolutePath()), mErrorList,
287                 mExtErrorMap, swallowParseException));
288     }
289 
290     /**
291      * Load the magic number list globally.
292      *
293      * @param configData the byte[] containing the the magic number entry data
294      */
295     public void load(final byte[] configData) {
296         List<String> mErrorList = null;
297         Map<String, List<String>> mExtErrorMap = null;
298         if (this.logErrors) {
299             mErrorList = this.errorList;
300             mExtErrorMap = this.extErrorMap;
301         }
302         this.magicNumbers.addAll(MagicNumberFactory.buildMagicNumberList(configData, mErrorList, mExtErrorMap));
303     }
304 
305     public int size() {
306         return this.magicNumbers.size();
307     }
308 
309     public String getErrorLog() {
310         if (!this.logErrors) {
311             return "";
312         }
313         return getErrorLog(this.magicNumbers, this.errorList, this.extErrorMap);
314     }
315 
316     /**
317      * Summarizes
318      */
319     public String getErrorLog(final List<MagicNumber> magicNumberList, final List<String> zeroDepthErrorList,
320             final Map<String, List<String>> continuationErrorMap) {
321         final StringBuilder sb = new StringBuilder();
322         final String lineBreak = "\n###########################################################";
323         sb.append(lineBreak);
324         sb.append("\nSUMMARY");
325         sb.append(lineBreak);
326         sb.append("\nSUCCESSFUL ENTRIES.................................................. ");
327         sb.append(magicNumberList.size() - continuationErrorMap.size());
328         sb.append("\nFAILED ENTRIES...................................................... ");
329         sb.append(zeroDepthErrorList.size());
330         sb.append("\nPARTIALLY SUCCESSFUL ENTRIES (failed on some child continuations)... ");
331         sb.append(continuationErrorMap.size());
332         sb.append('\n');
333         sb.append(lineBreak);
334         sb.append("\nFAILED ENTRIES (failed on some continuations)\n\n");
335 
336         for (final String err : zeroDepthErrorList) {
337             sb.append('\n');
338             sb.append("ENTRY (STATUS:FAILED): ");
339             sb.append(err);
340         }
341 
342         sb.append('\n');
343         sb.append(lineBreak);
344         sb.append("\nPARTIALLY SUCCESSFUL ENTRIES (failed on some extensions)\n\n");
345 
346         for (final String entry : continuationErrorMap.keySet()) {
347             sb.append('\n');
348             sb.append("MAIN ENTRY (STATUS:SUCCESSFUL): ");
349             sb.append(entry);
350 
351             for (final String extValue : continuationErrorMap.get(entry)) {
352                 sb.append("\n\tCONTINUATION (STATUS:FAILED): ");
353                 sb.append(extValue);
354             }
355         }
356 
357         return sb.toString();
358     }
359 }