MultiKeywordScanner.java
package emissary.util.search;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
public class MultiKeywordScanner implements IMultiKeywordScanner {
private static final Logger logger = LoggerFactory.getLogger(MultiKeywordScanner.class);
@Nullable
private byte[] data = null;
private final int[] skipTable = new int[256];
private int standardSkip = 0;
private BackwardsTreeScanner treeScanner;
private String[] keywords;
private int lastPosition = -1;
public MultiKeywordScanner() {}
@Override
public void loadKeywords(final String[] keywordsArg) {
this.keywords = keywordsArg;
try {
if (null == this.treeScanner) {
this.treeScanner = new BackwardsTreeScanner(keywordsArg);
} else {
this.treeScanner.resetKeywords(keywordsArg);
}
} catch (Exception e) {
logger.error("Could not create BackwardsTreeScanner", e);
}
buildSkipTable();
}
private void buildSkipTable() {
final int numKeywords = this.keywords.length;
for (int i = 0; i < numKeywords; i++) {
final byte[] keyword = this.keywords[i].getBytes();
final int keywordLength = keyword.length;
if (i == 0) {
this.standardSkip = keywordLength;
} else if (this.standardSkip > keywordLength) {
this.standardSkip = keywordLength;
}
}
for (int i = 0; i < 256; i++) {
this.skipTable[i] = this.standardSkip;
}
for (int i = 0; i < numKeywords; i++) {
final byte[] keyword = this.keywords[i].getBytes();
final int keywordLength = keyword.length;
for (int j = 0; j < (keywordLength - 1); j++) {
final int byteValue = get256Value(keyword[j]);
final int skip = keywordLength - (j + 1);
if (skip < this.skipTable[byteValue]) {
this.skipTable[byteValue] = skip;
}
}
}
}
private static int get256Value(final byte b) {
return ((int) b) & 0xff;
}
@Override
public HitList findAll(@Nullable final byte[] dataArg) {
if (dataArg != null) {
return this.findAll(dataArg, 0, dataArg.length);
}
return new HitList();
}
@Override
public HitList findAll(@Nullable final byte[] dataArg, final int start) {
if (dataArg != null) {
return this.findAll(dataArg, start, dataArg.length);
}
return new HitList();
}
@Override
public HitList findAll(final byte[] dataArg, final int start, final int stop) {
this.data = dataArg;
int position;
final HitList hits = new HitList();
for (position = start + this.standardSkip - 1; position < stop; position += this.skipTable[get256Value(dataArg[position])]) {
try {
this.treeScanner.scan(dataArg, position, hits);
} catch (Exception e) {
logger.error("Error scanning keywords in the BackwardsTreeScanner.", e);
break;
}
}
this.lastPosition = position;
return hits;
}
@Override
public HitList findNext() {
if (this.data != null) {
return this.findNext(this.data, this.lastPosition + 1, this.data.length);
}
return new HitList();
}
@Override
public HitList findNext(@Nullable final byte[] dataArg) {
if (dataArg != null) {
return this.findNext(dataArg, this.lastPosition + 1, dataArg.length);
}
return new HitList();
}
@Override
public HitList findNext(@Nullable final byte[] dataArg, final int start) {
if (dataArg != null) {
return this.findNext(dataArg, start, dataArg.length);
}
return new HitList();
}
@Override
public HitList findNext(final byte[] dataArg, final int start, final int stop) {
this.data = dataArg;
int position;
final HitList hits = new HitList();
for (position = start + this.standardSkip - 1; position < stop; position += this.skipTable[get256Value(dataArg[position])]) {
try {
this.treeScanner.scan(dataArg, position, hits);
if (!hits.isEmpty()) {
break;
}
} catch (Exception e) {
logger.error("Trouble scanning for keywords in BackwardsTreeScanner", e);
break;
}
}
this.lastPosition = position;
return hits;
}
}