AbstractFilter.java
package emissary.output.filter;
import emissary.config.ConfigUtil;
import emissary.config.Configurator;
import emissary.core.EmissaryRuntimeException;
import emissary.core.IBaseDataObject;
import emissary.output.DropOffUtil;
import emissary.util.JavaCharSet;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* Provides the base mechanism for a drop off filter
*/
public abstract class AbstractFilter implements IDropOffFilter {
/** A static convenience logger */
protected static final Logger slogger = LoggerFactory.getLogger(AbstractFilter.class);
/** get a logger configured on the impl's classname */
protected Logger logger = LoggerFactory.getLogger(this.getClass().getName());
/** hold onto the parent configurator */
protected Configurator configG;
/** hold onto the specific filter configurator */
protected Configurator filterConfig;
/** hold onto the filter name, impl should set this */
protected String filterName = "Abstract";
/** hold the output specification, if any, for this filter */
protected String outputSpec;
/** hold the error specification, if any, for this filter */
protected String errorSpec;
/** hold the filter condition specification, if any, for this filter */
protected String filterConditionSpec;
/** hold the filter condition, if any, for this filter */
private IFilterCondition filterCondition;
/**
* A set of FileType and FileTYpe.ViewName strings controlling what can be output by this filter
*/
protected Set<String> outputTypes = Collections.emptySet();
/** String to use when dealing with the primary view specifically */
public static final String PRIMARY_VIEW_NAME = "PrimaryView";
public static final String PRIMARY_VIEW = "." + PRIMARY_VIEW_NAME;
/** Primary view wildcard string */
public static final String ALL_PRIMARY_VIEWS = "*" + PRIMARY_VIEW;
/** String to use when dealing with a language in a view */
public static final String LANGUAGE_VIEW_NAME = "Language";
public static final String LANGUAGE_VIEW = "." + LANGUAGE_VIEW_NAME;
/** Language wildcard string */
public static final String ALL_LANGUAGE_VIEWS = "*" + LANGUAGE_VIEW;
/** Alternate view wildcard string */
public static final String ALL_ALT_VIEWS = "*.AlternateView";
/** Metadata view name */
public static final String METADATA_VIEW_NAME = "Metadata";
public static final String METADATA_VIEW = "." + METADATA_VIEW_NAME;
/* alternate views to NOT output if only a file type/form is specified */
protected Set<String> denylist = new HashSet<>();
protected Set<String> wildCardDenylist = new HashSet<>();
@Nullable
protected DropOffUtil dropOffUtil = null;
protected String denylistAllowedNameChars = "a-zA-Z0-9_\\-";
protected String denylistFiletypeFormat = "^[%s]+$";
protected Pattern denylistFiletypeFormatPattern;
protected String denylistViewNameFormat = "^[%s]+(\\.[%s]+)?\\*?$";
protected Pattern denylistViewNameFormatPattern;
/**
* Initialization phase hook for the filter with default preferences for the runtime configuration of the filter
*/
@Override
public void initialize(final Configurator theConfigG, final String filterName) {
loadFilterConfiguration(null);
initialize(theConfigG, filterName, this.filterConfig);
}
/**
* Initialization phase hook for the filter with provided filter configuration
*
* @param theConfigG passed in configuration object, usually DropOff's config
* @param filterName the configured name of this filter or null for the default
* @param theFilterConfig the configuration for the specific filter
*/
@Override
public void initialize(final Configurator theConfigG, @Nullable final String filterName,
final Configurator theFilterConfig) {
this.configG = theConfigG;
if (filterName != null) {
setFilterName(filterName);
}
loadFilterConfiguration(theFilterConfig);
loadFilterCondition(theConfigG);
loadOutputSpec(theConfigG);
this.dropOffUtil = new DropOffUtil(theConfigG);
initializeOutputTypes(this.filterConfig);
}
private void loadFilterCondition(final Configurator parentConfig) {
this.filterConditionSpec = parentConfig.findStringEntry("FILTER_CONDITION_" + getFilterName(), null);
// format FILTER_CONDITION_<filtername> = profilename:clazz just like dropoff filter config
if (!StringUtils.isEmpty(filterConditionSpec)) {
final String name;
final String clazz;
Configurator filterConfig = null;
final int colpos = filterConditionSpec.indexOf(':');
if (colpos > -1) {
name = filterConditionSpec.substring(0, colpos);
clazz = filterConditionSpec.substring(colpos + 1);
final String filterConfigName = parentConfig.findStringEntry(name);
if (filterConfigName != null) {
try {
filterConfig = ConfigUtil.getConfigInfo(filterConfigName);
} catch (IOException configError) {
logger.warn("Specified filter configuration {} cannot be loaded", filterConfigName);
return;
}
}
} else {
clazz = filterConditionSpec;
}
try {
final Object filterConditionObj = emissary.core.Factory.create(clazz);
if (filterConditionObj != null && filterConditionObj instanceof IFilterCondition) {
this.filterCondition = (IFilterCondition) filterConditionObj;
// initialize using the config
filterCondition.initialize(filterConfig);
} else {
logger.warn("Failed to initialize filter condition {}. Filter does not implement IFilterCondition", getFilterName());
}
} catch (Throwable t) {
// failed to initialize
logger.error("Failed in initialize filter condition {} with argument {} and message {}", getFilterName(), filterConditionSpec,
t.getMessage(), t);
}
}
}
/**
* Run custom configuration
*
* @param config the filter specific configurator
*/
protected void initializeOutputTypes(@Nullable final Configurator config) {
if (config != null) {
this.loadNameValidationPatterns(config);
this.outputTypes = config.findEntriesAsSet("OUTPUT_TYPE");
this.logger.debug("Loaded {} output types for filter {}", this.outputTypes.size(), this.outputTypes);
this.initializeDenylist(config);
} else {
this.logger.debug("InitializeCustom has null filter config");
}
}
protected void loadNameValidationPatterns(final Configurator config) {
denylistAllowedNameChars = config.findStringEntry("DENYLIST_ALLOWED_NAME_CHARS", denylistAllowedNameChars);
denylistFiletypeFormat = config.findStringEntry("DENYLIST_FILETYPE_FORMAT", denylistFiletypeFormat);
denylistFiletypeFormatPattern = Pattern.compile(denylistFiletypeFormat.replace("%s", denylistAllowedNameChars));
denylistViewNameFormat = config.findStringEntry("DENYLIST_VIEW_NAME_FORMAT", denylistViewNameFormat);
denylistViewNameFormatPattern = Pattern.compile(denylistViewNameFormat.replace("%s", denylistAllowedNameChars));
}
protected void initializeDenylist(final Configurator config) {
for (String entry : config.findEntriesAsSet("DENYLIST")) {
String viewName = validateAndRemoveDenylistFiletype(entry);
if (matchesDenylistViewNameFormatPattern(viewName)) {
if (viewName.chars().filter(ch -> ch == '.').count() > 0) {
logger.warn("`DENYLIST = \"{}\"` viewName `{}` should not contain any `.` characters", entry, viewName);
}
if (viewName.endsWith("*")) {
String strippedEntry = entry.substring(0, entry.length() - 1);
this.wildCardDenylist.add(strippedEntry);
} else {
this.denylist.add(entry);
}
} else {
throw new EmissaryRuntimeException(String.format(
"Invalid filter configuration: `DENYLIST = \"%s\"` " +
"entry `%s` must match pattern `%s`.",
entry, entry, getDenylistViewNameFormat()));
}
}
this.logger.debug("Loaded {} ignorelist types for filter {}", this.denylist.size(), this.denylist);
this.logger.debug("Loaded {} wildcard suffix ignorelist types for filter {}", this.wildCardDenylist.size(), this.wildCardDenylist);
}
protected String validateAndRemoveDenylistFiletype(final String entry) {
String[] names = entry.split("\\.", 2);
if (names.length > 1) {
String filetype = names[0];
String viewName = names[1];
if (filetype.equals("*")) { // DENYLIST = "*.<viewName>" not allowed
throw new EmissaryRuntimeException(String.format(
"Invalid filter configuration: `DENYLIST = \"%s\"` " +
"wildcarded filetypes not allowed in denylist - Did you mean `DENYLIST = \"%s\"`?",
entry, viewName));
} else if (!matchesDenylistFiletypeFormatPattern(filetype)) {
throw new EmissaryRuntimeException(String.format(
"Invalid filter configuration: `DENYLIST = \"%s\"` " +
"filetype `%s` must match pattern `%s`",
entry, filetype, getDenylistFiletypeFormat()));
}
return viewName;
}
return entry;
}
/**
* Return the name of this filter
*
* @return the string name of the filter
*/
@Override
public String getFilterName() {
return this.filterName;
}
/**
* Set the filter name
*
* @param s the new name to use for this filter instance
*/
@Override
public void setFilterName(final String s) {
this.filterName = s;
}
/**
* Load the filter configuration with precendence of provided, named, default Preference order for loading
* configurations
* <ol>
* <li>[filter-package].FILTER_NAME.cfg</li>
* <li>[filter-package].[filter-class]-FILTER_NAME.cfg</li>
* <li>[filter-package].[filter-class].cfg</li>
* </ol>
*
* @param suppliedFilterConfig configuration to use when not null
*/
protected void loadFilterConfiguration(@Nullable final Configurator suppliedFilterConfig) {
if (suppliedFilterConfig != null) {
this.filterConfig = suppliedFilterConfig;
return;
}
final List<String> configPreferences = new ArrayList<>();
if (getFilterName() != null) {
configPreferences.add(this.getClass().getPackage().getName() + "." + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
configPreferences.add(this.getClass().getName() + "-" + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
}
configPreferences.add(this.getClass().getName() + ConfigUtil.CONFIG_FILE_ENDING);
this.logger.debug("Looking for filter configuration preferences {}", configPreferences);
try {
this.filterConfig = ConfigUtil.getConfigInfo(configPreferences);
} catch (IOException iox) {
this.logger.debug("Could not find filter configuration for {}", getFilterName(), iox);
}
}
/**
* Run the filter for a set of documents
*
* @param list collection of IBaseDataObject to run the filter on
* @param params map of params
* @return status value
*/
@Override
public int filter(final List<IBaseDataObject> list, final Map<String, Object> params) {
// Important to process them in order, if not already sorted
int status = 0;
for (final IBaseDataObject d : list) {
status = filter(d, params);
}
return status;
}
/**
* Run the filter for a set of documents
*
* @param list collection of IBaseDataObject to run the filter on
* @param params map of params
* @param output the output stream
* @return status value
*/
@Override
public int filter(final List<IBaseDataObject> list, final Map<String, Object> params, final OutputStream output) {
int status = 0;
for (final IBaseDataObject d : list) {
status = filter(d, params, output);
}
return status;
}
/**
* The method that all filter have to provide for stream based output
*/
@Override
public int filter(final IBaseDataObject payload, final Map<String, Object> params, final OutputStream output) {
throw new IllegalArgumentException("Not supported, override to support");
}
@Override
public boolean isOutputtable(final IBaseDataObject d) {
return filterCondition == null || filterCondition.accept(d);
}
@Override
public boolean isOutputtable(final List<IBaseDataObject> list) {
return filterCondition == null || filterCondition.accept(list);
}
/**
* Determine if the payload is outputtable by the filter
*
* @param d the document
* @param params map of params
* @return true if the filter wants a crack at outputting this payload
*/
@Override
public boolean isOutputtable(final IBaseDataObject d, final Map<String, Object> params) {
return true;
}
/**
* Determine if the payload list is outputtable by the filter
*
* @param list collection of IBaseDataObject to check for outputtability
* @param params map of params
* @return true if the filter wants a crack at outputting this payload
*/
@Override
public boolean isOutputtable(final List<IBaseDataObject> list, final Map<String, Object> params) {
return true;
}
/**
* Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
* being on the outputTypes list from the run-time type configuration stream for the filter in questin.
*
* @param type of the data
*/
protected boolean isOutputtable(final String type) {
return this.outputTypes.contains("*") || this.outputTypes.contains(type);
}
/**
* Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
* being on the outputTypes list from the run-time type configuration stream for the filter in question.
*
* @param types types to check
* @return true if any one of the types is outputtable
*/
protected boolean isOutputtable(final Collection<String> types) {
if (this.outputTypes.contains("*")) {
this.logger.debug("Outputtable due to wildcard in output types");
return true;
}
final boolean canOutput = !Collections.disjoint(this.outputTypes, types);
if (canOutput && this.logger.isDebugEnabled()) {
final Set<String> outputFor = new HashSet<>();
for (final String s : this.outputTypes) {
if (types.contains(s)) {
outputFor.add(s);
}
}
this.logger.debug("Outputtable due to non-disjoint sets: {}", outputFor);
}
return canOutput;
}
/**
* Close the filter
*/
@Override
public void close() {
// nothing to do
}
/*
* Extract my Output Spec from the supplied config info and save it
*/
protected void loadOutputSpec(final Configurator theConfigG) {
this.outputSpec = theConfigG.findStringEntry("OUTPUT_SPEC_" + getFilterName(), null);
this.errorSpec = theConfigG.findStringEntry("ERROR_SPEC_" + getFilterName(), null);
this.logger.debug("Output spec for {} is {}", getFilterName(), this.outputSpec);
}
/**
* Get bytes as UTF-8 converted from specified charset
*
* @param value the contents
* @param charset the charset of the bytes in value
*/
protected String normalizeBytes(final byte[] value, final String charset) {
return normalizeBytes(value, 0, value.length, charset);
}
/**
* Get bytes as UTF-8 converted from specified charset
*
* @param value the contents
* @param start position to start subarray
* @param len length of subarray
* @param charset the charset of the bytes in value
*/
protected String normalizeBytes(final byte[] value, final int start, final int len, @Nullable final String charset) {
String s = null;
if (charset != null) {
try {
s = new String(value, start, len, charset);
} catch (UnsupportedEncodingException ex) {
this.logger.debug("Error encoding string", ex);
}
}
if (s == null) {
// from exception or no charset
s = new String(value, start, len);
}
return s;
}
/**
* Extract the charset from the payload or defaultCharset
*
* @param d the payload
* @param defaultCharset the default
* @return the charset or defualtCharset if none
*/
protected String getCharset(final IBaseDataObject d, final String defaultCharset) {
String lang = d.getFontEncoding();
if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
final String s = d.getStringParameter("HTML_CHARSET");
if (s != null) {
lang = s;
}
}
if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
final String s = d.getStringParameter("MIME_CHARSET");
if (s != null) {
lang = s;
}
}
if (lang == null) {
return defaultCharset;
} else {
return JavaCharSet.get(lang);
}
}
/**
* Makes a set of the file type, current form with and without the .PrimaryView qualifier and all the alternate view
* names Result set can be passed to {@link #isOutputtable(Collection)} for checking
*
* @param d the payload
*/
protected Set<String> getTypesToCheck(final IBaseDataObject d) {
final Set<String> checkTypes = getPrimaryTypesToCheck(d);
for (final String viewName : d.getAlternateViewNames()) {
checkTypes.addAll(getTypesToCheckForNamedView(d, viewName));
}
checkTypes.addAll(getTypesToCheckForNamedView(d, METADATA_VIEW_NAME));
checkTypes.add(ALL_ALT_VIEWS);
return checkTypes;
}
protected Set<String> getTypesToCheckForNamedView(final IBaseDataObject d, final String viewName) {
final Set<String> checkTypes = new HashSet<>();
final String lang = this.dropOffUtil.getLanguage(d);
final String fileType = DropOffUtil.getFileType(d);
final String currentForm = d.currentForm();
// skip over denylisted alt views
if (denyListContains(fileType, viewName)) {
return checkTypes;
}
checkTypes.add(fileType);
checkTypes.add(fileType + "." + viewName);
checkTypes.add("*." + viewName);
if (!"NONE".equals(lang)) {
checkTypes.add(lang);
checkTypes.add(lang + "." + viewName);
checkTypes.add(lang + "." + fileType);
checkTypes.add(lang + "." + fileType + "." + viewName);
}
if (currentForm != null && !fileType.equals(currentForm)) {
checkTypes.add(currentForm);
checkTypes.add(currentForm + "." + viewName);
if (!"NONE".equals(lang)) {
checkTypes.add(lang + "." + currentForm);
checkTypes.add(lang + "." + currentForm + "." + viewName);
}
}
this.logger.debug("Types to be checked for named view {}: {}", viewName, checkTypes);
return checkTypes;
}
protected boolean denyListContains(final String fileType, final String viewName) {
String fullName = fileType + "." + viewName;
if (this.denylist.contains(viewName) || this.denylist.contains(fullName)) {
return true;
}
return this.wildCardDenylist.stream().anyMatch(i -> viewName.startsWith(i) || fullName.startsWith(i));
}
/**
* Makes a set of the file type, current form with and without the .PrimaryView qualifier. Result set can be passed to
* {@link #isOutputtable(Collection)} for checking whether the primary view should be output
*
* @param d the payload
*/
protected Set<String> getPrimaryTypesToCheck(final IBaseDataObject d) {
final Set<String> checkTypes = getTypesToCheckForNamedView(d, PRIMARY_VIEW_NAME);
final String lang = this.dropOffUtil.getLanguage(d);
checkTypes.add(lang + LANGUAGE_VIEW);
checkTypes.add(ALL_LANGUAGE_VIEWS);
checkTypes.add(ALL_PRIMARY_VIEWS);
return checkTypes;
}
@Override
public String getOutputSpec() {
return this.outputSpec;
}
@Override
public String getErrorSpec() {
return this.errorSpec;
}
@Override
public Collection<String> getOutputTypes() {
return new HashSet<>(this.outputTypes);
}
public boolean matchesDenylistFiletypeFormatPattern(String str) {
return denylistFiletypeFormatPattern.matcher(str).matches();
}
public String getDenylistFiletypeFormat() {
return denylistFiletypeFormatPattern.pattern();
}
public boolean matchesDenylistViewNameFormatPattern(String str) {
return denylistViewNameFormatPattern.matcher(str).matches();
}
public String getDenylistViewNameFormat() {
return denylistViewNameFormatPattern.pattern();
}
}