View Javadoc
1   package emissary.util;
2   
3   import jakarta.annotation.Nullable;
4   
5   import java.nio.charset.Charset;
6   
7   /**
8    * Used the default way, this is equivalent to StringTokenizer st = new StringTokenizer(new String(theData),"\n",false);
9    * except that a token is returned for blank lines as well. There seems no way to tell the StringTokenizer to do that.
10   */
11  public class LineTokenizer {
12  
13      protected int previousIndex = -1;
14      protected int index = 0;
15      protected byte delim = (byte) '\n';
16      protected int tokenCount = 0;
17      protected byte[] data;
18      @Nullable
19      protected Charset charset = Charset.forName("8859_1");
20  
21      /**
22       * Create a line tokenizer to operate on some data
23       * 
24       * @param theData byte array of data
25       */
26      public LineTokenizer(byte[] theData) {
27          this(theData, (byte) ('\n' & 0xff));
28      }
29  
30      /**
31       * Create a line tokenizer to operate on some data
32       * 
33       * @param theData byte array of data
34       * @param delim the delimiter to mark off lines
35       */
36      public LineTokenizer(byte[] theData, byte delim) {
37          this.delim = delim;
38          data = new byte[theData.length];
39  
40          // Count delimiter tokens in data
41          if (data.length > 0) {
42              for (int i = 0; i < theData.length; i++) {
43                  data[i] = theData[i];
44                  if (data[i] == delim) {
45                      tokenCount++;
46                  }
47              }
48  
49              // Trailing portion with no trailing delim
50              if (data[data.length - 1] != delim) {
51                  tokenCount++;
52              }
53          }
54      }
55  
56      /**
57       * Create a line tokenizer to operate on some data
58       * 
59       * @param theData byte array of data
60       * @param delim the delimiter to mark off lines
61       * @param charset the character set to use the outputting tokens as strings
62       */
63      public LineTokenizer(byte[] theData, byte delim, @Nullable String charset) {
64          this(theData, delim);
65          this.charset = (charset == null ? null : Charset.forName(charset));
66      }
67  
68      /**
69       * Create a line tokenizer to operate on some data
70       * 
71       * @param theData byte array of data
72       * @param delim the delimiter to mark off lines
73       * @param charset the character set to use the outputting tokens as strings
74       */
75      public LineTokenizer(byte[] theData, byte delim, Charset charset) {
76          this(theData, delim);
77          this.charset = charset;
78      }
79  
80      /**
81       * Create a line tokenizer to operate on some data
82       * 
83       * @param theData byte array of data
84       * @param charset the character set to use the outputting tokens as strings
85       */
86      public LineTokenizer(byte[] theData, @Nullable String charset) {
87          this(theData);
88          this.charset = (charset == null ? null : Charset.forName(charset));
89      }
90  
91      /**
92       * Create a line tokenizer to operate on some data
93       * 
94       * @param theData byte array of data
95       * @param charset the character set to use the outputting tokens as strings
96       */
97      public LineTokenizer(byte[] theData, Charset charset) {
98          this(theData);
99          this.charset = charset;
100     }
101 
102     /**
103      * Set the character set to use when outputting tokens as strings default is 8859_1
104      * 
105      * @param charset the java charset value
106      */
107     public void setCharset(@Nullable String charset) {
108         this.charset = (charset == null ? null : Charset.forName(charset));
109     }
110 
111     /**
112      * Set the character set to use when outputting tokens as strings default is 8859_1
113      * 
114      * @param charset the java charset value
115      */
116     public void setCharset(Charset charset) {
117         this.charset = charset;
118     }
119 
120     /**
121      * Indicate if there are more lines
122      * 
123      * @return true if there are more lines
124      */
125     public boolean hasMoreTokens() {
126         return tokenCount > 0;
127     }
128 
129     /**
130      * Current count of tokens remaining
131      * 
132      * @return count of tokens remaining
133      */
134     public int countTokens() {
135         return tokenCount;
136     }
137 
138     /**
139      * Current byte offset in the data Caller can use this on their copy of the original data buffer to extract data of
140      * interest
141      * 
142      * @return current byte offset
143      */
144     public int getCurrentPosition() {
145         return index - 1;
146     }
147 
148     /**
149      * Next token as a string The string is created using the charset specified in the constructor or in the
150      * setCharset(String s) method
151      * 
152      * @return the next line as a string
153      */
154     public String nextToken() {
155 
156         byte[] btok = nextTokenBytes();
157         String tok = null;
158 
159         if (btok != null) {
160 
161             // Use the specified charset to create the string
162             if (charset != null) {
163                 tok = new String(btok, charset);
164             } else {
165                 tok = new String(btok);
166             }
167         }
168         return tok;
169     }
170 
171     /**
172      * Next token as an array of bytes
173      * 
174      * @return the next line as an array of bytes
175      */
176     @Nullable
177     public byte[] nextTokenBytes() {
178 
179         if (tokenCount == 0) {
180             return null;
181         }
182 
183         int end = index;
184 
185         for (; end < data.length && data[end] != delim; end++) {
186         }
187 
188         byte[] tok = new byte[end - index];
189         System.arraycopy(data, index, tok, 0, end - index);
190 
191         tokenCount--;
192         previousIndex = index;
193         if (tokenCount > 0 && data[end] == delim) {
194             index = end + 1;
195         } else {
196             index = end;
197         }
198 
199         return tok;
200     }
201 
202     /**
203      * Push back a single token onto the stack We only take a single push back. This just moves our pointers to the previous
204      * index
205      */
206     public void pushBack() {
207         if (previousIndex == -1) {
208             return; // already at beginning
209         }
210         tokenCount++;
211         index = previousIndex;
212         previousIndex = -1;
213     }
214 }