LineTokenizer.java

package emissary.util;

import java.nio.charset.Charset;
import javax.annotation.Nullable;

/**
 * Used the default way, this is equivalent to StringTokenizer st = new StringTokenizer(new String(theData),"\n",false);
 * except that a token is returned for blank lines as well. There seems no way to tell the StringTokenizer to do that.
 */
public class LineTokenizer {

    protected int previousIndex = -1;
    protected int index = 0;
    protected byte delim = (byte) '\n';
    protected int tokenCount = 0;
    protected byte[] data;
    @Nullable
    protected Charset charset = Charset.forName("8859_1");

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     */
    public LineTokenizer(byte[] theData) {
        this(theData, (byte) ('\n' & 0xff));
    }

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     * @param delim the delimiter to mark off lines
     */
    public LineTokenizer(byte[] theData, byte delim) {
        this.delim = delim;
        data = new byte[theData.length];

        // Count delimiter tokens in data
        if (data.length > 0) {
            for (int i = 0; i < theData.length; i++) {
                data[i] = theData[i];
                if (data[i] == delim) {
                    tokenCount++;
                }
            }

            // Trailing portion with no trailing delim
            if (data[data.length - 1] != delim) {
                tokenCount++;
            }
        }
    }

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     * @param delim the delimiter to mark off lines
     * @param charset the character set to use the outputting tokens as strings
     */
    public LineTokenizer(byte[] theData, byte delim, @Nullable String charset) {
        this(theData, delim);
        this.charset = (charset == null ? null : Charset.forName(charset));
    }

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     * @param delim the delimiter to mark off lines
     * @param charset the character set to use the outputting tokens as strings
     */
    public LineTokenizer(byte[] theData, byte delim, Charset charset) {
        this(theData, delim);
        this.charset = charset;
    }

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     * @param charset the character set to use the outputting tokens as strings
     */
    public LineTokenizer(byte[] theData, @Nullable String charset) {
        this(theData);
        this.charset = (charset == null ? null : Charset.forName(charset));
    }

    /**
     * Create a line tokenizer to operate on some data
     * 
     * @param theData byte array of data
     * @param charset the character set to use the outputting tokens as strings
     */
    public LineTokenizer(byte[] theData, Charset charset) {
        this(theData);
        this.charset = charset;
    }

    /**
     * Set the character set to use when outputting tokens as strings default is 8859_1
     * 
     * @param charset the java charset value
     */
    public void setCharset(@Nullable String charset) {
        this.charset = (charset == null ? null : Charset.forName(charset));
    }

    /**
     * Set the character set to use when outputting tokens as strings default is 8859_1
     * 
     * @param charset the java charset value
     */
    public void setCharset(Charset charset) {
        this.charset = charset;
    }

    /**
     * Indicate if there are more lines
     * 
     * @return true if there are more lines
     */
    public boolean hasMoreTokens() {
        return (tokenCount > 0);
    }

    /**
     * Current count of tokens remaining
     * 
     * @return count of tokens remaining
     */
    public int countTokens() {
        return tokenCount;
    }

    /**
     * Current byte offset in the data Caller can use this on their copy of the original data buffer to extract data of
     * interest
     * 
     * @return current byte offset
     */
    public int getCurrentPosition() {
        return index - 1;
    }

    /**
     * Next token as a string The string is created using the charset specified in the constructor or in the
     * setCharset(String s) method
     * 
     * @return the next line as a string
     */
    public String nextToken() {

        byte[] btok = nextTokenBytes();
        String tok = null;

        if (btok != null) {

            // Use the specified charset to create the string
            if (charset != null) {
                tok = new String(btok, charset);
            } else {
                tok = new String(btok);
            }
        }
        return tok;
    }

    /**
     * Next token as an array of bytes
     * 
     * @return the next line as an array of bytes
     */
    @Nullable
    public byte[] nextTokenBytes() {

        if (tokenCount == 0) {
            return null;
        }

        int end = index;

        for (; end < data.length && data[end] != delim; end++) {
        }

        byte[] tok = new byte[end - index];
        System.arraycopy(data, index, tok, 0, end - index);

        tokenCount--;
        previousIndex = index;
        if (tokenCount > 0 && data[end] == delim) {
            index = end + 1;
        } else {
            index = end;
        }

        return tok;
    }

    /**
     * Push back a single token onto the stack We only take a single push back. This just moves our pointers to the previous
     * index
     */
    public void pushBack() {
        if (previousIndex == -1) {
            return; // already at beginning
        }
        tokenCount++;
        index = previousIndex;
        previousIndex = -1;
    }
}