SizeUtil.java
package emissary.util;
import emissary.core.IBaseDataObject;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
/**
* This class provides routines for approximating the RAM size of Emissary objects - primarily the
* {@link IBaseDataObject}.
*/
public class SizeUtil {
/**
* A flag to indicate whether we are running 32 or 64 bit JVM. Note that this is the JVM bits and not the OS bits
*/
private static final boolean arch64;
/**
* A reference/pointer size in a non-compressed Ops JVM. Default is 32-bit = 4 bytes
*/
private static final long refSize;
/**
* Object overhead
*/
private static final long OBJ_OVERHEAD = 8L;
static {
if (System.getProperty("os.arch").contains("64")) {
arch64 = true;
refSize = 8L;
} else {
arch64 = false;
refSize = 4L;
}
}
/**
* <i>Approximate</i> the amount of RAM consumed by a String.
* <p>
* See
* <a href="https://www.cs.virginia.edu/kim/publicity/pldi09tutorials/memory-efficient-java-tutorial.pdf">tutorial</a>
* Slide 26.
* <p>
* This calculation is affected by the JVM architecture and whether
* <a href="https://wikis.oracle.com/display/HotSpotInternals/CompressedOops">CompressedOops</a> are enabled.
*
* @param str - the String to approximate
* @return The approximate size, in bytes, in RAM for str
*/
public static long sizeof(@Nullable String str) {
if (str == null) {
return 0L;
}
if (arch64) {
return (str.length() * 2L) + 52; // 2 bytes per UTF-16, 52 for JVM overhead & bookkeeping, 64-bit arch
} else {
return (str.length() * 2L) + 48; // 2 bytes per UTF-16, 48 for JVM overhead & bookkeeping, 32-bit arch
}
}
/**
* Approximate the amount of RAM consumed by an individual {@link IBaseDataObject}. The purpose of this method is to
* approximate the RAM that will be consumed by a corresponding serialized/deserialized object, hence not all aspects of
* the {@link IBaseDataObject} are considered. Additionally, this method does not include all the "outputtable" logic
* that may be present in output filter, which could make the actual size smaller than is reported by this method.
*
* @param ibdo The IBaseDataObject to approximate the size of
* @return The approximate size, in bytes, in RAM for the IBaseDataObject
*/
public static long sizeof(@Nullable IBaseDataObject ibdo) {
if (ibdo == null) {
return 0L;
}
long totalSize = 0L;
// Primary view (potentially gigantic)
totalSize += getPayloadMemorySize(ibdo);
// Factor in the extracted records
totalSize += getExtractedRecordMemorySize(ibdo);
if (ibdo.getParameters() == null) {
return totalSize;
} // This should never be null, but still check
// Finally, estimate the size of the metadata parameters, assuming the underlying Strings are UTF-16
for (Map.Entry<String, Collection<Object>> entry : ibdo.getParameters().entrySet()) {
// Note: The core multimap in Emissary has CharSequence as keys. This is to allow for more
// flexibility in key design. Under the hood, they are still Strings so the following
// routines are safe for the time-being, but will need to be changed if the keys change
// Get the size of the key
String k = entry.getKey();
totalSize += sizeof(k);
totalSize += refSize;
// Similar to the above, the values in Emissary are generic Objects although almost all of them
// are Strings. That is why the following check is made in the for loop.
// Get the size of the List of values
Collection<Object> values = entry.getValue();
for (Object v : values) {
if (v instanceof String) {
totalSize += sizeof((String) v);
totalSize += refSize;
}
// TODO: factor in non-String objects
// but this is not that important since there
// are not many of them and will change the overall
// size minimally at this time
}
}
return totalSize;
}
/**
* Estimate the size, in bytes, of the RAM of an entire {@link IBaseDataObject} family tree. This is simply the sum of
* the sizes of the individual members of the family tree.
*
* @param familyTree - {@code List<IBaseDataObject>} representing the family tree for a document object
* @return - the approximate size, in bytes, in RAM for the familyTree
*/
public static long sizeof(@Nullable List<IBaseDataObject> familyTree) {
if (familyTree == null) {
return 0L;
}
long totalSize = 0L;
for (IBaseDataObject ibdo : familyTree) {
if (ibdo != null) {
totalSize += sizeof(ibdo);
totalSize += OBJ_OVERHEAD + refSize; // For the pointer to ibdo in familyTree (Object overhead)
}
}
return totalSize;
}
/**
* Approximates the amount of memory consumed by the "extracted records" in a single IBaseDataObject. This is typically
* the case when the framework gets eventing datasets. These extracted records are usually treated specially and not run
* through the processing pipelines proper, but on output do appear as proper child IBaseDataObjects. In the case of a
* large dataset, these extracted records can consume huge amounts of RAM.
*
* @param ibdo - The IBaseDataObject to approximate
* @return - The approximate memory footprint, in bytes, for extracted records of an IBaseDataObject
*/
public static long getExtractedRecordMemorySize(@Nullable IBaseDataObject ibdo) {
if (ibdo == null) {
return 0L;
}
long totalSize = 0L;
// Count up the size of any extracted children
if (ibdo.hasExtractedRecords()) {
List<IBaseDataObject> childObjList = ibdo.getExtractedRecords();
if (childObjList != null) {
for (IBaseDataObject child : childObjList) {
if (child != null) {
totalSize += sizeof(child);
totalSize += OBJ_OVERHEAD + refSize; // For the pointer to child in childObjList (Object
// overhead)
}
}
}
}
return totalSize;
}
/**
* Approximate the amount of memory consumed by the various "payloads" of an IBaseDataObject. In this case, a payload
* refers to the header, footer, data (primary view), and all the alternate views.
*
* @param ibdo - The IBaseDataObject to approximate
* @return - The approximate memory footprint, in bytes, for the IBaseDataObject
*/
public static long getPayloadMemorySize(@Nullable IBaseDataObject ibdo) {
if (ibdo == null) {
return 0L;
}
long totalSize = 0L;
// We don't concern ourselves with object references here, because they are likely
// tiny compared to the various "payloads" and altViews. A more accurate
// version of this method would factor those in, but they are likely to be
// negligible
// Primary view (potentially gigantic)
totalSize += ibdo.dataLength(); // This always returns an int
// Header and footer size (probably not big)
if (ibdo.footer() != null) {
totalSize += ibdo.footer().length;
}
if (ibdo.header() != null) {
totalSize += ibdo.header().length;
}
// Size up the alternative views
for (Map.Entry<String, byte[]> altView : ibdo.getAlternateViews().entrySet()) {
if (altView.getValue() != null) {
totalSize += altView.getValue().length;
}
}
return totalSize;
}
/** This class is not meant to be instantiated. */
private SizeUtil() {}
}