ByteMatcher.java

package emissary.util.search;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nullable;

/**
 * This class provides some simple string matching functions on byte arrays
 */
public class ByteMatcher {

    @Nullable
    private byte[] mydata = null;

    @Nullable
    private KeywordScanner scanner = null;

    public static final int NOTFOUND = -1;

    public ByteMatcher() {
        this(new byte[0]);
    }

    public ByteMatcher(String data) {
        this(data.getBytes());
    }

    public ByteMatcher(byte[] data) {
        resetData(data);
    }

    public void resetData(String data) {
        resetData(data, StandardCharsets.UTF_8);
    }

    public void resetData(String data, String charsetName) {
        resetData(data, Charset.forName(charsetName));
    }

    public void resetData(String data, Charset charset) {
        resetData(data.getBytes(charset));
    }

    /**
     * Reset the byte array. Use of this method avoids having to instantiate a new ByteMatcher.
     *
     * @param data - bytes to match against
     */
    public void resetData(byte[] data) {
        this.mydata = data;
        if (null == this.scanner) {
            this.scanner = new KeywordScanner(data);
        } else {
            this.scanner.resetData(data);
        }
    }

    /**
     * Return a reference to the text we are working on
     */
    public byte[] getText() {
        return mydata;
    }

    /**
     * Return the length of the text
     */
    public int length() {
        return mydata.length;
    }

    /**
     * Match pattern in the text
     */
    public int indexOf(byte[] pattern) {

        return indexOf(pattern, 0);

    }

    /**
     * Match pattern in the text
     */
    public int indexOf(String pattern) {

        return indexOf(pattern.getBytes(), 0);

    }

    /**
     * This method finds a pattern in the text and returns the offset
     *
     * @param pattern bytes to find
     * @param startOfs start index
     */
    public int indexOf(byte[] pattern, int startOfs) {

        if (mydata == null) {
            return NOTFOUND;
        }

        return indexOf(pattern, startOfs, mydata.length);

    }

    /**
     * Match pattern in the text beginning at startOfs
     */
    public int indexOf(String pattern, int startOfs) {

        return indexOf(pattern.getBytes(), startOfs);

    }

    /**
     * This method finds a pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return position
     */
    public int indexOf(byte[] pattern, int beginIndex, int endIndex) {

        // Impossible to find under these conditions
        if (mydata == null || beginIndex > (mydata.length - pattern.length) || endIndex > mydata.length) {
            return NOTFOUND;
        }

        // Use the Boyer-Moore scanning algorithm.
        return scanner.indexOf(pattern, beginIndex, endIndex);

    }

    /**
     * Match pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return position
     */
    public int indexOf(String pattern, int beginIndex, int endIndex) {

        return indexOf(pattern.getBytes(), beginIndex, endIndex);

    }

    /**
     * Match pattern in the text
     *
     * @param pattern bytes to find
     * @return list of positions
     */
    public List<Integer> listIndexOf(byte[] pattern) {
        return listIndexOf(pattern, 0);
    }

    /**
     * Match pattern in the text
     *
     * @param pattern bytes to find
     * @return list of positions
     */
    public List<Integer> listIndexOf(String pattern) {

        return listIndexOf(pattern.getBytes(), 0);
    }

    /**
     * This method finds a pattern in the text from {@code startOfs} and returns a list of offsets
     *
     * @param pattern bytes to find
     * @param startOfs start index
     */
    public List<Integer> listIndexOf(byte[] pattern, int startOfs) {

        if (mydata == null) {
            return Collections.emptyList();
        }

        return listIndexOf(pattern, startOfs, mydata.length);
    }

    /**
     * Match pattern in the text beginning at {@code startOfs}
     *
     * @param pattern bytes to find
     * @param startOfs start index
     * @return list of positions
     */
    public List<Integer> listIndexOf(String pattern, int startOfs) {

        return listIndexOf(pattern.getBytes(), startOfs);
    }

    /**
     * This method finds a pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset list
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return list of positions
     */
    public List<Integer> listIndexOf(byte[] pattern, int beginIndex, int endIndex) {

        // Impossible to find under these conditions
        if (mydata == null || beginIndex > (mydata.length - pattern.length) || endIndex > mydata.length) {
            return Collections.emptyList();
        }

        return scanner.listIndexOf(pattern, beginIndex, endIndex);
    }


    /**
     * Match pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset list
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return list of positions
     */
    public List<Integer> listIndexOf(String pattern, int beginIndex, int endIndex) {

        return listIndexOf(pattern.getBytes(), beginIndex, endIndex);
    }

    /**
     * Sort of like libc's strcmp, find if pattern matches this at offset
     */
    public boolean strcmp(int offset, @Nullable String pattern) {

        if (pattern == null) {
            return false;
        }

        byte[] patternBytes = pattern.getBytes();

        for (int i = 0; i < patternBytes.length; i++) {

            if (offset + i >= mydata.length) {
                return false;
            }

            if (mydata[offset + i] != patternBytes[i]) {
                return false;
            }

        }

        return true;

    }

    /**
     * Return the specified byte
     */
    public byte byteAt(int i) {

        if (i < 0 || i >= mydata.length) {
            throw new ArrayIndexOutOfBoundsException("ByteMatcher.data(" + mydata.length + ") : " + i);
        }

        return mydata[i];
    }

    /**
     * Return a slice
     *
     * @param start index to start
     * @param end index one past the end of desired range
     * @return array slice
     */
    public byte[] slice(int start, int end) {
        if (end > start && start >= 0 && end <= mydata.length) {
            byte[] slice = new byte[end - start];
            System.arraycopy(mydata, start, slice, 0, end - start);
            return slice;
        }
        return new byte[0];
    }


    /**
     * StartsWith
     */
    public boolean startsWith(String s) {
        if (mydata.length < s.length()) {
            return false;
        }

        for (int i = 0; i < s.length(); i++) {
            if (byteAt(i) != (byte) s.charAt(i)) {
                return false;
            }
        }
        return true;
    }

    /**
     * This method finds a pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset ignoring
     * upper/lower case
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return position
     */
    public int indexIgnoreCase(byte[] pattern, int beginIndex, int endIndex) {

        // Impossible to find under these conditions
        if (mydata == null || beginIndex > (mydata.length - pattern.length) || endIndex > mydata.length) {
            return NOTFOUND;
        }


        // Use the Boyer-Moore scanner. Set it to
        // ignore case.

        scanner.setCaseSensitive(false);
        int matchPos = scanner.indexOf(pattern, beginIndex, endIndex);

        // Reset scanner to default state.
        scanner.setCaseSensitive(true);

        return matchPos;


    }

    /**
     * This method finds a pattern in the text and returns the offset ignoring upper/lower case
     */

    public int indexIgnoreCase(byte[] pattern, int startOfs) {

        if (mydata == null) {
            return NOTFOUND;
        }

        return indexIgnoreCase(pattern, startOfs, mydata.length);
    }

    public int indexIgnoreCase(byte[] pattern) {

        return indexIgnoreCase(pattern, 0);

    }

    public int indexIgnoreCase(String pattern) {

        return indexIgnoreCase(pattern.getBytes(), 0);

    }

    public int indexIgnoreCase(String pattern, int startOfs) {

        return indexIgnoreCase(pattern.getBytes(), startOfs);

    }

    /**
     * Match pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset ignoring upper/lower
     * case
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return position
     */
    public int indexIgnoreCase(String pattern, int beginIndex, int endIndex) {

        return indexIgnoreCase(pattern.getBytes(), beginIndex, endIndex);

    }

    /**
     * This method finds a pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset list
     * ignoring upper/lower case
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(byte[] pattern, int beginIndex, int endIndex) {

        // Impossible to find under these conditions
        if (mydata == null || beginIndex > (mydata.length - pattern.length) || endIndex > mydata.length) {
            return Collections.emptyList();
        }

        scanner.setCaseSensitive(false);
        List<Integer> matchPosList = scanner.listIndexOf(pattern, beginIndex, endIndex);

        // Reset scanner to default state.
        scanner.setCaseSensitive(true);

        return matchPosList;
    }

    /**
     * This method finds a pattern in the text and returns the offset list ignoring upper/lower case
     *
     * @param pattern bytes to find
     * @param startOfs start index
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(byte[] pattern, int startOfs) {

        if (mydata == null) {
            return Collections.emptyList();
        }

        return indexListIgnoreCase(pattern, startOfs, mydata.length);
    }

    /**
     * Match pattern in the test and returns the offset list ignoring upper/lower case
     *
     * @param pattern bytes to find
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(byte[] pattern) {

        return indexListIgnoreCase(pattern, 0);
    }

    /**
     * Match pattern in the text from {@code beginIndex} to {@code endIndex} and returns the offset list ignoring
     * upper/lower case
     *
     * @param pattern bytes to find
     * @param beginIndex start index
     * @param endIndex the index to stop searching at, exclusive
     *
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(String pattern, int beginIndex, int endIndex) {

        return indexListIgnoreCase(pattern.getBytes(), beginIndex, endIndex);
    }

    /**
     * Match pattern in the text from {@code startOfs} and returns the offset list ignoring upper/lower case
     *
     * @param pattern bytes to find
     * @param startOfs start index
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(String pattern, int startOfs) {

        return indexListIgnoreCase(pattern.getBytes(), startOfs);
    }

    /**
     * Match pattern in the text
     *
     * @param pattern bytes to find
     * @return list of positions
     */
    public List<Integer> indexListIgnoreCase(String pattern) {

        return indexListIgnoreCase(pattern.getBytes(), 0);
    }

    /**
     * Find tags of the form "Key{token}Value" returning "Value" when "Key" is supplied. The value goes after the {token} to
     * the end of the line.
     */
    @Nullable
    public String getValue(String key, int ofs, String delim) {

        int keypos = this.indexOf(key, ofs);
        if (keypos == -1) {
            return null;
        }

        int eolpos = keypos + key.length();
        while (eolpos < mydata.length && mydata[eolpos] != '\n' && mydata[eolpos] != '\r') {
            eolpos++;
        }

        int delimpos = this.indexOf(delim, keypos + key.length());
        int eodpos = delimpos + delim.length();
        if (delimpos > -1 && eodpos < eolpos) {
            return new String(mydata, eodpos, eolpos - eodpos);
        } else if (eodpos == eolpos) {
            return "";
        } else {
            return null;
        }
    }

    public String getValue(String key, int ofs) {
        return getValue(key, ofs, "=");
    }

    public String getValue(String key) {
        return getValue(key, 0, "=");
    }

    /**
     * Get the value of a S tag, given the value S values work like this: KEY: length data data tdata data more data
     * NEXTKEY: nextlength
     */

    public byte[] getSValue(String key) {
        return getSValue(key, 0, mydata.length);
    }

    @Nullable
    public byte[] getSValue(String key, int ofs, int limit) {

        // Make sure the key exists
        int keypos = this.indexOf(key, ofs);
        if (keypos == -1 || keypos > limit) {
            return null;
        }
        int valpos = this.indexOf("\n", keypos);
        if (valpos > limit) {
            return null;
        }
        if (valpos == -1) {
            valpos = this.mydata.length;
        }
        valpos += 1; // past the new line

        // Get the length out as a string
        String sDelim = ":";
        String strLength = this.getValue(key, ofs, sDelim);
        if (strLength == null) {
            return null;
        }

        // Turn the length into an int
        int length = -1;
        try {
            length = Integer.parseInt(strLength.trim());
        } catch (NumberFormatException e) {
            // empty catch block
        }
        if (length <= 0) {
            return null;
        }

        // Dont let length exceed limit
        if (valpos + length > limit) {
            length = limit;
        }

        // Make sure the length is legal
        if (valpos + length > this.mydata.length) {
            return null;
        }

        // Take off a new line if that's the last char
        if (mydata[valpos + length - 1] == '\n') {
            length--;
        }

        // Return the bytes of the data
        byte[] value = new byte[length];
        System.arraycopy(this.mydata, valpos, value, 0, length);
        return value;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(this.getClass());
        sb.append("[");
        sb.append(this.length());
        sb.append("] : ");
        sb.append(new String(this.mydata));
        return sb.toString();
    }
}