KffFile.java
package emissary.kff;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.EOFException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.concurrent.locks.ReentrantLock;
import javax.annotation.Nonnull;
/**
* <p>
* KffFile provides access to the known file filter data. The NIST/NSRL data is a CSV file with other information. It
* must be preprocessed in order for this class to access it. The input file for this class must consist of a sorted
* list of known values, where a known value is the CRC32 appended to the SHA-1. This file must be a binary file, so
* each record will be 24 bytes long (20-byte SHA + 4-byte CRC). The CRC should be big endian.
* </p>
* <p>
* Implementation notes: The binary input file is too big to read into memory, so we implement a binary search on the
* file itself. This is why the records must be sorted, and it will improve performance if only unique records are
* generated as well. This class assumes JDK1.4+ and memory maps the file. For earlier versions of the JDK, we can seek
* through the RandomAccessFile instead but performance isn't as good. The file cannot be larger than 2^31 (2 GB)
* because that is the maximum length of the mapped ByteBuffer.
* </p>
*/
public class KffFile implements KffFilter {
private static final Logger logger = LoggerFactory.getLogger(KffFile.class);
/** File containing SHA-1/CRC32 results of known files */
protected RandomAccessFile knownFile;
/** Byte buffer that is mapped to the above file */
protected ByteBuffer mappedBuf;
/** Initial value of high index for binary search */
private final long bSearchInitHigh;
public static final int DEFAULT_RECORD_LENGTH = 24;
protected final int recordLength;
/** String logical name for this filter */
protected String filterName = "UNKNOWN";
protected FilterType ftype = FilterType.UNKNOWN;
protected String myPreferredAlgorithm = "SHA-1";
protected ReentrantLock reentrantLock = new ReentrantLock();
/**
* Creates a new instance of KffFile
*
* @param filename Name of binary file containing sorted RECORD_LENGTH records that are the hash codes possibly
* concatenated with the CRC-32
* @param filterName the name of this filter
* @param ftype type of this filter
* @throws IOException if thrown by file I/O
*/
public KffFile(String filename, String filterName, FilterType ftype) throws IOException {
this(filename, filterName, ftype, DEFAULT_RECORD_LENGTH);
}
/**
* Creates a new instance of KffFile
*
* @param filename Name of binary file containing sorted RECORD_LENGTH records that are the hash codes possibly
* concatenated with the CRC-32
* @param filterName the name of this filter
* @param ftype type of this filter
* @param recordLength fixed record length in file
* @throws IOException if thrown by file I/O
*/
public KffFile(String filename, String filterName, FilterType ftype, int recordLength) throws IOException {
this.ftype = ftype;
this.filterName = filterName;
this.recordLength = recordLength;
// Open file in read-only mode
knownFile = new RandomAccessFile(filename, "r");
// Initial high value for binary search is largest index
bSearchInitHigh = (knownFile.length() / (long) recordLength) - 1;
logger.debug("KFF File {} has {} records", filename, (bSearchInitHigh + 1));
}
/**
* Return the filter name
*/
@Override
public String getName() {
return filterName;
}
/**
* Return the filter type
*/
@Override
public FilterType getFilterType() {
return ftype;
}
/**
* Set the filter type
*
* @param f the new type
*/
public void setFilterType(FilterType f) {
ftype = f;
}
/**
* Set the preferred algorithm to match what is in the mmaped file
*
* @param alg the new algorithm to use
*/
public void setPreferredAlgorithm(String alg) {
myPreferredAlgorithm = alg;
}
/**
* Return the algorigthm being used
*/
public String getPreferredAlgorithm() {
return myPreferredAlgorithm;
}
/**
* Performs a binary search on the file to see if a given HASH/CRC is in the list.
*
* @param hash Result of HASH calculation
* @param crc Result of CRC calculation
* @return true if the record is in the list, false if it isn't
*/
private boolean binaryFileSearch(@Nonnull byte[] hash, long crc) {
// Initialize indexes for binary search
long low = 0;
long high = bSearchInitHigh;
/* Buffer to hold a record */
byte[] rec = new byte[recordLength];
reentrantLock.lock();
try {
// Search until the indexes cross
while (low <= high) {
// Calculate the midpoint
long mid = (low + high) >> 1;
// Multiply the index by the record length to get the buffer position and read the record
knownFile.seek(recordLength * mid);
knownFile.readFully(rec);
// Compare the record with the target. Adjust the indexes accordingly.
int c = compare(rec, hash, crc);
if (c < 0) {
high = mid - 1;
} else if (c > 0) {
low = mid + 1;
} else {
return true;
}
}
} catch (EOFException e) {
// this shouldn't happen if we're synchronizing calls correctly
logger.warn("EOFException reading KffFile: {}", e.getLocalizedMessage());
} catch (IOException e) {
logger.warn("Exception reading KffFile", e);
} finally {
if (reentrantLock.isHeldByCurrentThread()) {
reentrantLock.unlock();
}
}
// not found
return false;
}
/**
* Compares the given hash/crc to the one in the record.
*
* @param record bytes from the kff binary file, one record long
* @param hash HASH to compare to record
* @param crc CRC to compare to record
* @return <0 if given value is less than record, >0 if given value is greater than record, 0 if they match
*/
private static int compare(@Nonnull byte[] record, @Nonnull byte[] hash, long crc) {
int i;
// Compare the hashes first. We can't compare the bytes directly because a Java byte is signed and may generate the
// wrong result. We must convert to integers and then mask off the sign bits to get proper results.
for (i = 0; i < hash.length; i++) {
int ihash = hash[i] & 0xff;
int irec = record[i] & 0xff;
if (ihash < irec) {
return -1;
} else if (ihash > irec) {
return 1;
}
}
// If the hashes match, check the CRCs.
if (crc != -1L) {
for (int j = 24; i < record.length; i++, j -= 8) {
int icrc = ((int) crc >> j) & 0xff;
int irec = record[i] & 0xff;
if (icrc < irec) {
return -1;
} else if (icrc > irec) {
return 1;
}
}
}
return 0;
}
@Override
public boolean check(String fname, ChecksumResults csum) throws Exception {
byte[] hash = csum.getHash(myPreferredAlgorithm);
if (hash == null) {
logger.warn("Filter cannot be used, {} not computed on {}", myPreferredAlgorithm, fname);
return false;
}
return binaryFileSearch(hash, csum.getCrc());
}
}