MagicNumberUtil.java

package emissary.util;

import emissary.util.magic.MagicNumber;
import emissary.util.magic.MagicNumberFactory;
import emissary.util.shell.Executrix;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

/**
 * Magic entry rules when using the Java utility, MagicNumberUtil
 *
 * A. Examples:
 *
 * Java ByteCode From Larry Schwimmer (schwim@cs.stanford.edu) 0 belong 0xcafebabe compiled Java class data, >6
 * beshort x version %d. >4 beshort x \b%d
 *
 * The entries must have 4 columns where the first three are delimited by blank space as tabs, or spaces, or both and
 * the remaining columns will be stored as the description. Since spaces are also delimiters - if the value column
 * (third column) requires a space then it should be escaped.
 *
 * B. Offset Column 1. A decimal, hex, or octal value preceded or not preceded by '>' 2. Decimal: n* - if the
 * occurrences is > 1, then not preceded by '0' 3. Hex: 0xn* 4. Octal: 0n* 5. Offset values in the format '(n.s+32)'
 * are ignored. These only occurred in the continuations
 *
 * C. Data Type Column 1. BYTE, SHORT, LONG, STRING, BESHORT, BELONG, LESHORT, LELONG 2. LEDATE, BEDATE, and DATE are
 * not supported 3. Masking: If the data type is followed by a mask value in decimal, octal or hex and delimited by
 * '&' then the value column will be stored as the product of the masking. The mask value cannot exceed the data
 * type length.
 *
 * D. Value columns 1. String, Byte, Short, or Long - 1, 2, or 4 byte values - or any length value for the string type.
 * 2. String values can be escaped. a. Example "\0\40\320\3200\4text\ \7\x40\r\t\" parsed as: "0-32-208-208-0-4-text-
 * -7-64- " with the dashes removed.
 *
 * 1. Escaping number values Numbers can be of length up to three octal or two hex and can also be terminated by
 * non-digits and finally must be less then 256. These numeric values are substituted into their respective byte
 * positions.
 *
 * - 3200 will be evaluated as '320' octal and '0' string. - 0 as 0 octal - 40 as 40 octal - x40 as 40 hex which is 64 -
 * 4text as 4 and succeeded by the characters 't' 'e' 'x' 't'
 *
 * 2. Escaping characters including spaces - Spaces must be escaped - otherwise they'll be tokenized into the next
 * column - 4\ 4\4 results in [char 4, char space, char 4, integer 4] - Ascii values 8-15 can be escaped as: \b\t\n\r
 * etc... see man page for ascii - trailing slashes will result in the placement of a trailing space
 *
 * 3. Substitution Example >4 beshort x \bversion %d.%c Substitution is allowed for continuations only. In this case,
 * the short byte array will be sampled from the document at offset 4 and length 2. This stored value can be substituted
 * in the description field where %c or %s will substitute convert the number into a unicode character and %d %ld and
 * other numeric data types will instead substitute the numeric value.
 *
 * In the above, if value 'x' at offset 4 equals 0101 octal, then the substitutions will be a decimal value of 64 and
 * the character value of 'A' resulting in: "\bversion 64.A
 *
 * E. Description The description is comprised of all remaining columns once the first three have been discovered. They
 * can be blank in continuations since continuations may depend upon the successful testing of preceding continuations.
 * In other words:
 *
 * 0 long 0xcafebabe java binary >4 byte x version %d
 *
 * can be re-written as
 *
 * 0 short 0xcafe java >4 byte 0xba >>6 byte 0xbe \b\bbinary >>>4 byte x version %d
 *
 * where the continuations will only occur upon the completion of '>4 byte 0xba'
 *
 * 1. The descriptions can be escaped with a '\b' 2. Each continuation is prefixed with a space when added to a
 * description. To avoid this or remove spaces use the '\b' backspace and it will perform a backspace function or trim
 * previous character. 3. See Value column for substitution rules
 */

public final class MagicNumberUtil {

    private static final Logger log = LoggerFactory.getLogger(MagicNumberUtil.class);

    /** The magic number instances */
    private final List<MagicNumber> magicNumbers = new ArrayList<>();

    /**
     * Log flag for storing parse errors - they will just be discarded. Switching this on will allow erroneous entries to be
     * logged and can be retrieved using the method getErrorLog to find out which entries had parsing errors. Using the
     * magic file shipped with version unix file 3.39 only three/four primary entries were unsupported - these had to do
     * with signed data types such as ubelong. Or the value was larger then the specified data type which occurred once.
     * Otherwise, remaining errors were in continuations - mainly when the offset value was in the form of n.s+32 where 'n'
     * is a decimal value and 's' could not be determined.
     */
    private boolean logErrors = false;

    /**
     * Log data structure for continuations. Maps entries with depth 0 with a List of continuation entries containing the
     * errors
     */
    private final Map<String, List<String>> extErrorMap = new TreeMap<>();

    /**
     * Log data structure for entries with a depth of '0' - these are the important entries. Just maintains a simple list of
     * these entries
     */
    private final List<String> errorList = new ArrayList<>();

    /**
     * Private Constructor
     */
    public MagicNumberUtil() {}

    /**
     * Main testing - plug in the magic file as the first arg and the target file to be examined as the second.
     *
     * Usage: java xxx.MagicNumberUtil [magic config file absolute path] [target file to be id'd]
     */
    public static void main(final String[] args) {
        if (args.length < 2) {
            log.info("Usage: java xxx.MagicNumberUtil [magic config file absolute path] [target file to be id'd]");
        }

        final MagicNumberUtil util = new MagicNumberUtil();
        try {
            // make sure the magic file exists
            final File magicFile = new File(args[0]);
            if (!magicFile.exists()) {
                log.info("Could not find the magic config file at: {}", magicFile.getAbsolutePath());
                System.exit(0);
            }
            // make sure the target file can be found as well
            final File target = new File(args[1]);
            if (!target.exists()) {
                log.info("Could not find the target file at: {}", target.getAbsolutePath());
            }

            // load the magic numbers
            util.load(new File(args[0]));

        } catch (Exception e) {
            log.error("Error in main", e);
        }
        // if error logging is enabled on the 'logErrors' flag then this will print out the entry and continuation
        // parsing errors
        if (util.logErrors) {
            log.error(util.getErrorLog());
        }
    }

    public void setErrorLogging(final boolean logErr) {
        this.logErrors = logErr;
    }

    /**
     * Input a byte array sample and it will be compared against the global magic number list. Descriptions for matching
     * entries inclusive of continuations.
     *
     * @param data a byte[]
     * @return {@link String} representing matching description plus matching continuation descriptions or null.
     * @throws RuntimeException If the magic file has not been loaded globally using the load methods.
     * @see #load(java.io.File)
     * @see #load(byte[])
     */
    public String describe(final byte[] data) {
        log.debug("Checking against {} magic items", this.magicNumbers.size());
        String description = null;
        for (final MagicNumber item : this.magicNumbers) {
            log.debug("Checking magic item {}", item);
            description = item.describe(data);
            if (description != null && !description.isEmpty()) {
                break;
            }
        }
        return description;
    }

    /**
     * Input a byte array sample and it will be compared against the global magic number list. Descriptions for matching
     * entries inclusive of continuations provided.
     *
     * @param target data a java.io.File
     * @return A string representing matching description plus matching continuation descriptions or null.
     * @throws RuntimeException If the magic file has not been loaded globally using the load methods.
     * @throws IOException If a read error occurs loading the target file.
     * @see #load(java.io.File)
     * @see #load(byte[])
     */
    public String describe(final File target) throws IOException {
        try {
            if (!target.exists()) {
                throw new IOException("Target file not found at: " + target.getAbsolutePath());
            }
        } catch (SecurityException se) {
            throw new IOException("Security Exception reading file: " + se.getMessage());
        }
        return describe(Executrix.readDataFromFile(target.getAbsolutePath()));
    }

    /**
     * Do not load magic file globally and do not compare against the global magic number list and instead compare target
     * against the specified magic file. The magic file will be read/parsed each time as the comparative file. Useful for
     * debugging or if certain files can be narrowed down to a smaller magic file list improving id performance.
     *
     * @param target a java.io.File specifying the file to be id'd
     * @param magicConfig the magic file containing the magic number entries to use
     * @return {@link String} representing the id description or null
     * @throws IOException If an IO error occurs while reading either file.
     */
    public static String describe(final File target, final File magicConfig) throws IOException {
        try {
            if (!target.exists()) {
                throw new IOException("Target file not found at: " + target.getAbsolutePath());
            } else if (!magicConfig.exists()) {
                throw new IOException("Magic config file not found at: " + magicConfig.getAbsolutePath());
            }
        } catch (SecurityException se) {
            throw new IOException("Security Exception reading file: " + se.getMessage());
        }

        return describe(Executrix.readDataFromFile(target.getAbsolutePath()), magicConfig);
    }

    /**
     * Do not load magic file globally and do not compare against the global magic number list and compare target against
     * the specified magic file instead. The magic file will be read/parsed each time as the comparative file. Useful for
     * debugging or if certain files can be narrowed down to a smaller magic file list improving id performance.
     *
     * @param sample a byte[] containing the data to be id'd
     * @param magicConfig the magic file containing the magic number entries to use
     * @return {@link String} representing the id description or null
     * @throws IOException If an IO error occurs while reading either file.
     */
    public static String describe(final byte[] sample, final File magicConfig) throws IOException {
        try {
            if (!magicConfig.exists()) {
                throw new IOException("Magic config file not found at: " + magicConfig.getAbsolutePath());
            }
        } catch (SecurityException se) {
            throw new IOException("Security Exception reading file: " + se.getMessage());
        }

        final List<MagicNumber> magicNumberList =
                MagicNumberFactory.buildMagicNumberList(Executrix.readDataFromFile(magicConfig.getAbsolutePath()), null, null);

        String description = null;
        for (final MagicNumber item : magicNumberList) {
            description = item.describe(sample);
            if (description != null) {
                break;
            }
        }
        return description;
    }

    /**
     * Load the magic number list globally.
     *
     * @param config the java.io.File pointing to the magic file
     * @exception IOException if one occurs while reading the config file or if a security access error occurs
     */
    public void load(final File config) throws IOException {
        load(config, false);
    }

    /**
     * Load the magic number list globally.
     *
     * @param config the java.io.File pointing to the magic file
     * @param swallowParseException should we swallow Ignorable ParseException or bubble them up
     * @exception IOException if one occurs while reading the config file or if a security access error occurs
     */
    public void load(final File config, final boolean swallowParseException) throws IOException {
        try {
            if (!config.exists()) {
                throw new IOException("File not found");
            }
        } catch (SecurityException se) {
            throw new IOException("Security Exception: " + se.getMessage());
        }

        List<String> mErrorList = null;
        Map<String, List<String>> mExtErrorMap = null;
        if (this.logErrors) {
            mErrorList = this.errorList;
            mExtErrorMap = this.extErrorMap;
        }
        this.magicNumbers.addAll(MagicNumberFactory.buildMagicNumberList(Executrix.readDataFromFile(config.getAbsolutePath()), mErrorList,
                mExtErrorMap, swallowParseException));
    }

    /**
     * Load the magic number list globally.
     *
     * @param configData the byte[] containing the the magic number entry data
     */
    public void load(final byte[] configData) {
        List<String> mErrorList = null;
        Map<String, List<String>> mExtErrorMap = null;
        if (this.logErrors) {
            mErrorList = this.errorList;
            mExtErrorMap = this.extErrorMap;
        }
        this.magicNumbers.addAll(MagicNumberFactory.buildMagicNumberList(configData, mErrorList, mExtErrorMap));
    }

    public int size() {
        return this.magicNumbers.size();
    }

    public String getErrorLog() {
        if (!this.logErrors) {
            return "";
        }
        return getErrorLog(this.magicNumbers, this.errorList, this.extErrorMap);
    }

    /**
     * Summarizes
     */
    public String getErrorLog(final List<MagicNumber> magicNumberList, final List<String> zeroDepthErrorList,
            final Map<String, List<String>> continuationErrorMap) {
        final StringBuilder sb = new StringBuilder();
        final String lineBreak = "\n###########################################################";
        sb.append(lineBreak);
        sb.append("\nSUMMARY");
        sb.append(lineBreak);
        sb.append("\nSUCCESSFUL ENTRIES.................................................. ");
        sb.append(magicNumberList.size() - continuationErrorMap.size());
        sb.append("\nFAILED ENTRIES...................................................... ");
        sb.append(zeroDepthErrorList.size());
        sb.append("\nPARTIALLY SUCCESSFUL ENTRIES (failed on some child continuations)... ");
        sb.append(continuationErrorMap.size());
        sb.append('\n');
        sb.append(lineBreak);
        sb.append("\nFAILED ENTRIES (failed on some continuations)\n\n");

        for (final String err : zeroDepthErrorList) {
            sb.append('\n');
            sb.append("ENTRY (STATUS:FAILED): ");
            sb.append(err);
        }

        sb.append('\n');
        sb.append(lineBreak);
        sb.append("\nPARTIALLY SUCCESSFUL ENTRIES (failed on some extensions)\n\n");

        for (final String entry : continuationErrorMap.keySet()) {
            sb.append('\n');
            sb.append("MAIN ENTRY (STATUS:SUCCESSFUL): ");
            sb.append(entry);

            for (final String extValue : continuationErrorMap.get(entry)) {
                sb.append("\n\tCONTINUATION (STATUS:FAILED): ");
                sb.append(extValue);
            }
        }

        return sb.toString();
    }
}