DataIdentifier.java
package emissary.parser;
import emissary.config.ConfigUtil;
import emissary.config.Configurator;
import emissary.util.shell.Executrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
/**
* A simple base class for doing data type identification This simple implementation can only match constant strings
* against data. The things to match are read from a config file
*/
public class DataIdentifier {
// Logger
private static final Logger logger = LoggerFactory.getLogger(DataIdentifier.class);
// Default value
public static final String UNKNOWN_TYPE = "simple";
// Size of string to test
@SuppressWarnings("ConstantField")
protected int DATA_ID_STR_SZ = 100;
// Things we know how to identify
protected Map<String, String> typesMap = new HashMap<>();
/**
* Create the id engine
*/
public DataIdentifier() {
configure(null);
}
/**
* Create the id engine with the specified config info
*/
public DataIdentifier(Configurator config) {
configure(config);
}
protected void configure(@Nullable Configurator config) {
try {
if (config == null) {
config = ConfigUtil.getConfigInfo(this.getClass());
}
typesMap = config.findStringMatchMap("TYPE_", Configurator.PRESERVE_CASE);
logger.debug("Configured with " + typesMap.size() + " identifiction types");
} catch (IOException iox) {
logger.debug("No configuration info found");
}
}
/**
* Return a slice as string for testing
*
* @param data the bytes to slice
* @param limit max length to use for testing
*/
protected String getTestString(byte[] data, int limit) {
if (data.length < limit) {
return new String(data);
}
return new String(data, 0, limit);
}
/**
* Return a slice as string for testing
*
* @param data the bytes to slice
* @return the slice
*/
protected String getTestString(byte[] data) {
return getTestString(data, DATA_ID_STR_SZ);
}
/**
* Identify the data in the array
*
* @param data array of data to identify
*/
public String identify(byte[] data) {
for (Map.Entry<String, String> entry : typesMap.entrySet()) {
byte[] pattern = entry.getValue().getBytes();
if (data.length < pattern.length) {
continue;
}
boolean match = true;
for (int i = 0; i < pattern.length; i++) {
if (data[i] != pattern[i]) {
match = false;
break;
}
}
if (match) {
logger.debug("Data identified as " + entry.getKey());
return entry.getKey();
}
}
logger.debug("No identification possible, returning UNKNOWN_TYPE");
return UNKNOWN_TYPE;
}
/**
* Get the size of data that is required for an id This is the maximum amount of data that the id algorithm will use,
* more or less can be sent,
*
* @see #getTestString(byte[])
*/
public int getTestStringMaxSize() {
return DATA_ID_STR_SZ;
}
@SuppressWarnings("SystemOut")
public static void main(String[] args) throws Exception {
DataIdentifier dataIdentifier = new DataIdentifier();
for (String filename : args) {
RandomAccessFile raf = new RandomAccessFile(filename, "r");
byte[] data = Executrix.readDataFromFile(raf, 0, dataIdentifier.getTestStringMaxSize());
String result = dataIdentifier.identify(data);
System.out.println(filename + " : " + result);
}
}
}