1 package emissary.util;
2
3 import jakarta.annotation.Nullable;
4
5 import java.nio.charset.Charset;
6
7 /**
8 * Used the default way, this is equivalent to StringTokenizer st = new StringTokenizer(new String(theData),"\n",false);
9 * except that a token is returned for blank lines as well. There seems no way to tell the StringTokenizer to do that.
10 */
11 public class LineTokenizer {
12
13 protected int previousIndex = -1;
14 protected int index = 0;
15 protected byte delim = (byte) '\n';
16 protected int tokenCount = 0;
17 protected byte[] data;
18 @Nullable
19 protected Charset charset = Charset.forName("8859_1");
20
21 /**
22 * Create a line tokenizer to operate on some data
23 *
24 * @param theData byte array of data
25 */
26 public LineTokenizer(byte[] theData) {
27 this(theData, (byte) ('\n' & 0xff));
28 }
29
30 /**
31 * Create a line tokenizer to operate on some data
32 *
33 * @param theData byte array of data
34 * @param delim the delimiter to mark off lines
35 */
36 public LineTokenizer(byte[] theData, byte delim) {
37 this.delim = delim;
38 data = new byte[theData.length];
39
40 // Count delimiter tokens in data
41 if (data.length > 0) {
42 for (int i = 0; i < theData.length; i++) {
43 data[i] = theData[i];
44 if (data[i] == delim) {
45 tokenCount++;
46 }
47 }
48
49 // Trailing portion with no trailing delim
50 if (data[data.length - 1] != delim) {
51 tokenCount++;
52 }
53 }
54 }
55
56 /**
57 * Create a line tokenizer to operate on some data
58 *
59 * @param theData byte array of data
60 * @param delim the delimiter to mark off lines
61 * @param charset the character set to use the outputting tokens as strings
62 */
63 public LineTokenizer(byte[] theData, byte delim, @Nullable String charset) {
64 this(theData, delim);
65 this.charset = (charset == null ? null : Charset.forName(charset));
66 }
67
68 /**
69 * Create a line tokenizer to operate on some data
70 *
71 * @param theData byte array of data
72 * @param delim the delimiter to mark off lines
73 * @param charset the character set to use the outputting tokens as strings
74 */
75 public LineTokenizer(byte[] theData, byte delim, Charset charset) {
76 this(theData, delim);
77 this.charset = charset;
78 }
79
80 /**
81 * Create a line tokenizer to operate on some data
82 *
83 * @param theData byte array of data
84 * @param charset the character set to use the outputting tokens as strings
85 */
86 public LineTokenizer(byte[] theData, @Nullable String charset) {
87 this(theData);
88 this.charset = (charset == null ? null : Charset.forName(charset));
89 }
90
91 /**
92 * Create a line tokenizer to operate on some data
93 *
94 * @param theData byte array of data
95 * @param charset the character set to use the outputting tokens as strings
96 */
97 public LineTokenizer(byte[] theData, Charset charset) {
98 this(theData);
99 this.charset = charset;
100 }
101
102 /**
103 * Set the character set to use when outputting tokens as strings default is 8859_1
104 *
105 * @param charset the java charset value
106 */
107 public void setCharset(@Nullable String charset) {
108 this.charset = (charset == null ? null : Charset.forName(charset));
109 }
110
111 /**
112 * Set the character set to use when outputting tokens as strings default is 8859_1
113 *
114 * @param charset the java charset value
115 */
116 public void setCharset(Charset charset) {
117 this.charset = charset;
118 }
119
120 /**
121 * Indicate if there are more lines
122 *
123 * @return true if there are more lines
124 */
125 public boolean hasMoreTokens() {
126 return tokenCount > 0;
127 }
128
129 /**
130 * Current count of tokens remaining
131 *
132 * @return count of tokens remaining
133 */
134 public int countTokens() {
135 return tokenCount;
136 }
137
138 /**
139 * Current byte offset in the data Caller can use this on their copy of the original data buffer to extract data of
140 * interest
141 *
142 * @return current byte offset
143 */
144 public int getCurrentPosition() {
145 return index - 1;
146 }
147
148 /**
149 * Next token as a string The string is created using the charset specified in the constructor or in the
150 * setCharset(String s) method
151 *
152 * @return the next line as a string
153 */
154 public String nextToken() {
155
156 byte[] btok = nextTokenBytes();
157 String tok = null;
158
159 if (btok != null) {
160
161 // Use the specified charset to create the string
162 if (charset != null) {
163 tok = new String(btok, charset);
164 } else {
165 tok = new String(btok);
166 }
167 }
168 return tok;
169 }
170
171 /**
172 * Next token as an array of bytes
173 *
174 * @return the next line as an array of bytes
175 */
176 @Nullable
177 public byte[] nextTokenBytes() {
178
179 if (tokenCount == 0) {
180 return null;
181 }
182
183 int end = index;
184
185 for (; end < data.length && data[end] != delim; end++) {
186 }
187
188 byte[] tok = new byte[end - index];
189 System.arraycopy(data, index, tok, 0, end - index);
190
191 tokenCount--;
192 previousIndex = index;
193 if (tokenCount > 0 && data[end] == delim) {
194 index = end + 1;
195 } else {
196 index = end;
197 }
198
199 return tok;
200 }
201
202 /**
203 * Push back a single token onto the stack We only take a single push back. This just moves our pointers to the previous
204 * index
205 */
206 public void pushBack() {
207 if (previousIndex == -1) {
208 return; // already at beginning
209 }
210 tokenCount++;
211 index = previousIndex;
212 previousIndex = -1;
213 }
214 }