View Javadoc
1   package emissary.output.filter;
2   
3   import emissary.config.ConfigUtil;
4   import emissary.config.Configurator;
5   import emissary.core.EmissaryRuntimeException;
6   import emissary.core.IBaseDataObject;
7   import emissary.output.DropOffUtil;
8   import emissary.util.JavaCharSet;
9   
10  import org.apache.commons.lang3.StringUtils;
11  import org.slf4j.Logger;
12  import org.slf4j.LoggerFactory;
13  
14  import java.io.IOException;
15  import java.io.OutputStream;
16  import java.io.UnsupportedEncodingException;
17  import java.util.ArrayList;
18  import java.util.Collection;
19  import java.util.Collections;
20  import java.util.HashSet;
21  import java.util.List;
22  import java.util.Locale;
23  import java.util.Map;
24  import java.util.Set;
25  import java.util.regex.Pattern;
26  import javax.annotation.Nullable;
27  
28  /**
29   * Provides the base mechanism for a drop off filter
30   */
31  public abstract class AbstractFilter implements IDropOffFilter {
32      /** A static convenience logger */
33      protected static final Logger slogger = LoggerFactory.getLogger(AbstractFilter.class);
34  
35      /** get a logger configured on the impl's classname */
36      protected Logger logger = LoggerFactory.getLogger(this.getClass().getName());
37  
38      /** hold onto the parent configurator */
39      protected Configurator configG;
40  
41      /** hold onto the specific filter configurator */
42      protected Configurator filterConfig;
43  
44      /** hold onto the filter name, impl should set this */
45      protected String filterName = "Abstract";
46  
47      /** hold the output specification, if any, for this filter */
48      protected String outputSpec;
49  
50      /** hold the error specification, if any, for this filter */
51      protected String errorSpec;
52  
53      /** hold the filter condition specification, if any, for this filter */
54      protected String filterConditionSpec;
55  
56      /** hold the filter condition, if any, for this filter */
57      private IFilterCondition filterCondition;
58  
59      /**
60       * A set of FileType and FileTYpe.ViewName strings controlling what can be output by this filter
61       */
62      protected Set<String> outputTypes = Collections.emptySet();
63  
64      /** String to use when dealing with the primary view specifically */
65      public static final String PRIMARY_VIEW_NAME = "PrimaryView";
66      public static final String PRIMARY_VIEW = "." + PRIMARY_VIEW_NAME;
67  
68      /** Primary view wildcard string */
69      public static final String ALL_PRIMARY_VIEWS = "*" + PRIMARY_VIEW;
70  
71      /** String to use when dealing with a language in a view */
72      public static final String LANGUAGE_VIEW_NAME = "Language";
73      public static final String LANGUAGE_VIEW = "." + LANGUAGE_VIEW_NAME;
74  
75      /** Language wildcard string */
76      public static final String ALL_LANGUAGE_VIEWS = "*" + LANGUAGE_VIEW;
77  
78      /** Alternate view wildcard string */
79      public static final String ALL_ALT_VIEWS = "*.AlternateView";
80  
81      /** Metadata view name */
82      public static final String METADATA_VIEW_NAME = "Metadata";
83      public static final String METADATA_VIEW = "." + METADATA_VIEW_NAME;
84  
85      /* alternate views to NOT output if only a file type/form is specified */
86      protected Set<String> denylist = new HashSet<>();
87      protected Set<String> wildCardDenylist = new HashSet<>();
88  
89      @Nullable
90      protected DropOffUtil dropOffUtil = null;
91  
92      protected String denylistAllowedNameChars = "a-zA-Z0-9_\\-";
93      protected String denylistFiletypeFormat = "^[%s]+$";
94      protected Pattern denylistFiletypeFormatPattern;
95      protected String denylistViewNameFormat = "^[%s]+(\\.[%s]+)?\\*?$";
96      protected Pattern denylistViewNameFormatPattern;
97  
98      /**
99       * Initialization phase hook for the filter with default preferences for the runtime configuration of the filter
100      */
101     @Override
102     public void initialize(final Configurator theConfigG, final String filterName) {
103         loadFilterConfiguration(null);
104         initialize(theConfigG, filterName, this.filterConfig);
105     }
106 
107     /**
108      * Initialization phase hook for the filter with provided filter configuration
109      * 
110      * @param theConfigG passed in configuration object, usually DropOff's config
111      * @param filterName the configured name of this filter or null for the default
112      * @param theFilterConfig the configuration for the specific filter
113      */
114     @Override
115     public void initialize(final Configurator theConfigG, @Nullable final String filterName,
116             final Configurator theFilterConfig) {
117         this.configG = theConfigG;
118         if (filterName != null) {
119             setFilterName(filterName);
120         }
121         loadFilterConfiguration(theFilterConfig);
122         loadFilterCondition(theConfigG);
123         loadOutputSpec(theConfigG);
124         this.dropOffUtil = new DropOffUtil(theConfigG);
125         initializeOutputTypes(this.filterConfig);
126     }
127 
128     private void loadFilterCondition(final Configurator parentConfig) {
129         this.filterConditionSpec = parentConfig.findStringEntry("FILTER_CONDITION_" + getFilterName(), null);
130 
131         // format FILTER_CONDITION_<filtername> = profilename:clazz just like dropoff filter config
132         if (!StringUtils.isEmpty(filterConditionSpec)) {
133             final String name;
134             final String clazz;
135             Configurator filterConfig = null;
136             final int colpos = filterConditionSpec.indexOf(':');
137             if (colpos > -1) {
138                 name = filterConditionSpec.substring(0, colpos);
139                 clazz = filterConditionSpec.substring(colpos + 1);
140                 final String filterConfigName = parentConfig.findStringEntry(name);
141                 if (filterConfigName != null) {
142                     try {
143                         filterConfig = ConfigUtil.getConfigInfo(filterConfigName);
144                     } catch (IOException configError) {
145                         logger.warn("Specified filter configuration {} cannot be loaded", filterConfigName);
146                         return;
147                     }
148                 }
149             } else {
150                 clazz = filterConditionSpec;
151             }
152 
153             try {
154                 final Object filterConditionObj = emissary.core.Factory.create(clazz);
155 
156                 if (filterConditionObj != null && filterConditionObj instanceof IFilterCondition) {
157                     this.filterCondition = (IFilterCondition) filterConditionObj;
158                     // initialize using the config
159                     filterCondition.initialize(filterConfig);
160                 } else {
161                     logger.warn("Failed to initialize filter condition {}. Filter does not implement IFilterCondition", getFilterName());
162                 }
163             } catch (Throwable t) {
164                 // failed to initialize
165                 logger.error("Failed in initialize filter condition {} with argument {} and message {}", getFilterName(), filterConditionSpec,
166                         t.getMessage(), t);
167             }
168 
169         }
170     }
171 
172     /**
173      * Run custom configuration
174      * 
175      * @param config the filter specific configurator
176      */
177     protected void initializeOutputTypes(@Nullable final Configurator config) {
178         if (config != null) {
179             this.loadNameValidationPatterns(config);
180             this.outputTypes = config.findEntriesAsSet("OUTPUT_TYPE");
181             this.logger.debug("Loaded {} output types for filter {}", this.outputTypes.size(), this.outputTypes);
182             this.initializeDenylist(config);
183         } else {
184             this.logger.debug("InitializeCustom has null filter config");
185         }
186     }
187 
188     protected void loadNameValidationPatterns(final Configurator config) {
189         denylistAllowedNameChars = config.findStringEntry("DENYLIST_ALLOWED_NAME_CHARS", denylistAllowedNameChars);
190         denylistFiletypeFormat = config.findStringEntry("DENYLIST_FILETYPE_FORMAT", denylistFiletypeFormat);
191         denylistFiletypeFormatPattern = Pattern.compile(denylistFiletypeFormat.replace("%s", denylistAllowedNameChars));
192         denylistViewNameFormat = config.findStringEntry("DENYLIST_VIEW_NAME_FORMAT", denylistViewNameFormat);
193         denylistViewNameFormatPattern = Pattern.compile(denylistViewNameFormat.replace("%s", denylistAllowedNameChars));
194     }
195 
196     protected void initializeDenylist(final Configurator config) {
197         for (String entry : config.findEntriesAsSet("DENYLIST")) {
198             String viewName = validateAndRemoveDenylistFiletype(entry);
199             if (matchesDenylistViewNameFormatPattern(viewName)) {
200                 if (viewName.chars().filter(ch -> ch == '.').count() > 0) {
201                     logger.warn("`DENYLIST = \"{}\"` viewName `{}` should not contain any `.` characters", entry, viewName);
202                 }
203 
204                 if (viewName.endsWith("*")) {
205                     String strippedEntry = entry.substring(0, entry.length() - 1);
206                     this.wildCardDenylist.add(strippedEntry);
207                 } else {
208                     this.denylist.add(entry);
209                 }
210 
211             } else {
212                 throw new EmissaryRuntimeException(String.format(
213                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
214                                 "entry `%s` must match pattern `%s`.",
215                         entry, entry, getDenylistViewNameFormat()));
216             }
217         }
218 
219         this.logger.debug("Loaded {} ignorelist types for filter {}", this.denylist.size(), this.denylist);
220         this.logger.debug("Loaded {} wildcard suffix ignorelist types for filter {}", this.wildCardDenylist.size(), this.wildCardDenylist);
221     }
222 
223     protected String validateAndRemoveDenylistFiletype(final String entry) {
224         String[] names = entry.split("\\.", 2);
225 
226         if (names.length > 1) {
227             String filetype = names[0];
228             String viewName = names[1];
229 
230             if (filetype.equals("*")) { // DENYLIST = "*.<viewName>" not allowed
231                 throw new EmissaryRuntimeException(String.format(
232                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
233                                 "wildcarded filetypes not allowed in denylist - Did you mean `DENYLIST = \"%s\"`?",
234                         entry, viewName));
235             } else if (!matchesDenylistFiletypeFormatPattern(filetype)) {
236                 throw new EmissaryRuntimeException(String.format(
237                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
238                                 "filetype `%s` must match pattern `%s`",
239                         entry, filetype, getDenylistFiletypeFormat()));
240             }
241             return viewName;
242         }
243         return entry;
244     }
245 
246     /**
247      * Return the name of this filter
248      * 
249      * @return the string name of the filter
250      */
251     @Override
252     public String getFilterName() {
253         return this.filterName;
254     }
255 
256     /**
257      * Set the filter name
258      * 
259      * @param s the new name to use for this filter instance
260      */
261     @Override
262     public void setFilterName(final String s) {
263         this.filterName = s;
264     }
265 
266     /**
267      * Load the filter configuration with precendence of provided, named, default Preference order for loading
268      * configurations
269      * <ol>
270      * <li>[filter-package].FILTER_NAME.cfg</li>
271      * <li>[filter-package].[filter-class]-FILTER_NAME.cfg</li>
272      * <li>[filter-package].[filter-class].cfg</li>
273      * </ol>
274      * 
275      * @param suppliedFilterConfig configuration to use when not null
276      */
277     protected void loadFilterConfiguration(@Nullable final Configurator suppliedFilterConfig) {
278         if (suppliedFilterConfig != null) {
279             this.filterConfig = suppliedFilterConfig;
280             return;
281         }
282 
283         final List<String> configPreferences = new ArrayList<>();
284 
285         if (getFilterName() != null) {
286             configPreferences.add(this.getClass().getPackage().getName() + "." + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
287             configPreferences.add(this.getClass().getName() + "-" + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
288         }
289         configPreferences.add(this.getClass().getName() + ConfigUtil.CONFIG_FILE_ENDING);
290 
291         this.logger.debug("Looking for filter configuration preferences {}", configPreferences);
292         try {
293             this.filterConfig = ConfigUtil.getConfigInfo(configPreferences);
294         } catch (IOException iox) {
295             this.logger.debug("Could not find filter configuration for {}", getFilterName(), iox);
296         }
297     }
298 
299     /**
300      * Run the filter for a set of documents
301      * 
302      * @param list collection of IBaseDataObject to run the filter on
303      * @param params map of params
304      * @return status value
305      */
306     @Override
307     public int filter(final List<IBaseDataObject> list, final Map<String, Object> params) {
308         // Important to process them in order, if not already sorted
309 
310         int status = 0;
311         for (final IBaseDataObject d : list) {
312             status = filter(d, params);
313         }
314         return status;
315     }
316 
317     /**
318      * Run the filter for a set of documents
319      * 
320      * @param list collection of IBaseDataObject to run the filter on
321      * @param params map of params
322      * @param output the output stream
323      * @return status value
324      */
325     @Override
326     public int filter(final List<IBaseDataObject> list, final Map<String, Object> params, final OutputStream output) {
327 
328         int status = 0;
329         for (final IBaseDataObject d : list) {
330             status = filter(d, params, output);
331         }
332         return status;
333     }
334 
335     /**
336      * The method that all filter have to provide for stream based output
337      */
338     @Override
339     public int filter(final IBaseDataObject payload, final Map<String, Object> params, final OutputStream output) {
340         throw new IllegalArgumentException("Not supported, override to support");
341     }
342 
343     @Override
344     public boolean isOutputtable(final IBaseDataObject d) {
345         return filterCondition == null || filterCondition.accept(d);
346     }
347 
348     @Override
349     public boolean isOutputtable(final List<IBaseDataObject> list) {
350         return filterCondition == null || filterCondition.accept(list);
351     }
352 
353     /**
354      * Determine if the payload is outputtable by the filter
355      * 
356      * @param d the document
357      * @param params map of params
358      * @return true if the filter wants a crack at outputting this payload
359      */
360     @Override
361     public boolean isOutputtable(final IBaseDataObject d, final Map<String, Object> params) {
362         return true;
363     }
364 
365     /**
366      * Determine if the payload list is outputtable by the filter
367      * 
368      * @param list collection of IBaseDataObject to check for outputtability
369      * @param params map of params
370      * @return true if the filter wants a crack at outputting this payload
371      */
372     @Override
373     public boolean isOutputtable(final List<IBaseDataObject> list, final Map<String, Object> params) {
374         return true;
375     }
376 
377     /**
378      * Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
379      * being on the outputTypes list from the run-time type configuration stream for the filter in questin.
380      *
381      * @param type of the data
382      */
383     protected boolean isOutputtable(final String type) {
384         return this.outputTypes.contains("*") || this.outputTypes.contains(type);
385     }
386 
387     /**
388      * Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
389      * being on the outputTypes list from the run-time type configuration stream for the filter in question.
390      *
391      * @param types types to check
392      * @return true if any one of the types is outputtable
393      */
394     protected boolean isOutputtable(final Collection<String> types) {
395         if (this.outputTypes.contains("*")) {
396             this.logger.debug("Outputtable due to wildcard in output types");
397             return true;
398         }
399 
400         final boolean canOutput = !Collections.disjoint(this.outputTypes, types);
401         if (canOutput && this.logger.isDebugEnabled()) {
402             final Set<String> outputFor = new HashSet<>();
403             for (final String s : this.outputTypes) {
404                 if (types.contains(s)) {
405                     outputFor.add(s);
406                 }
407             }
408             this.logger.debug("Outputtable due to non-disjoint sets: {}", outputFor);
409         }
410         return canOutput;
411     }
412 
413     /**
414      * Close the filter
415      */
416     @Override
417     public void close() {
418         // nothing to do
419     }
420 
421     /*
422      * Extract my Output Spec from the supplied config info and save it
423      */
424     protected void loadOutputSpec(final Configurator theConfigG) {
425         this.outputSpec = theConfigG.findStringEntry("OUTPUT_SPEC_" + getFilterName(), null);
426         this.errorSpec = theConfigG.findStringEntry("ERROR_SPEC_" + getFilterName(), null);
427         this.logger.debug("Output spec for {} is {}", getFilterName(), this.outputSpec);
428     }
429 
430     /**
431      * Get bytes as UTF-8 converted from specified charset
432      * 
433      * @param value the contents
434      * @param charset the charset of the bytes in value
435      */
436     protected String normalizeBytes(final byte[] value, final String charset) {
437         return normalizeBytes(value, 0, value.length, charset);
438     }
439 
440     /**
441      * Get bytes as UTF-8 converted from specified charset
442      * 
443      * @param value the contents
444      * @param start position to start subarray
445      * @param len length of subarray
446      * @param charset the charset of the bytes in value
447      */
448     protected String normalizeBytes(final byte[] value, final int start, final int len, @Nullable final String charset) {
449         String s = null;
450 
451         if (charset != null) {
452             try {
453                 s = new String(value, start, len, charset);
454             } catch (UnsupportedEncodingException ex) {
455                 this.logger.debug("Error encoding string", ex);
456             }
457         }
458 
459         if (s == null) {
460             // from exception or no charset
461             s = new String(value, start, len);
462         }
463 
464         return s;
465     }
466 
467     /**
468      * Extract the charset from the payload or defaultCharset
469      * 
470      * @param d the payload
471      * @param defaultCharset the default
472      * @return the charset or defualtCharset if none
473      */
474     protected String getCharset(final IBaseDataObject d, final String defaultCharset) {
475         String lang = d.getFontEncoding();
476         if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
477             final String s = d.getStringParameter("HTML_CHARSET");
478             if (s != null) {
479                 lang = s;
480             }
481         }
482         if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
483             final String s = d.getStringParameter("MIME_CHARSET");
484             if (s != null) {
485                 lang = s;
486             }
487         }
488         if (lang == null) {
489             return defaultCharset;
490         } else {
491             return JavaCharSet.get(lang);
492         }
493     }
494 
495     /**
496      * Makes a set of the file type, current form with and without the .PrimaryView qualifier and all the alternate view
497      * names Result set can be passed to {@link #isOutputtable(Collection)} for checking
498      *
499      * @param d the payload
500      */
501     protected Set<String> getTypesToCheck(final IBaseDataObject d) {
502         final Set<String> checkTypes = getPrimaryTypesToCheck(d);
503         for (final String viewName : d.getAlternateViewNames()) {
504             checkTypes.addAll(getTypesToCheckForNamedView(d, viewName));
505         }
506         checkTypes.addAll(getTypesToCheckForNamedView(d, METADATA_VIEW_NAME));
507         checkTypes.add(ALL_ALT_VIEWS);
508         return checkTypes;
509     }
510 
511     protected Set<String> getTypesToCheckForNamedView(final IBaseDataObject d, final String viewName) {
512         final Set<String> checkTypes = new HashSet<>();
513         final String lang = this.dropOffUtil.getLanguage(d);
514         final String fileType = DropOffUtil.getFileType(d);
515         final String currentForm = d.currentForm();
516 
517         // skip over denylisted alt views
518         if (denyListContains(fileType, viewName)) {
519             return checkTypes;
520         }
521 
522         checkTypes.add(fileType);
523         checkTypes.add(fileType + "." + viewName);
524         checkTypes.add("*." + viewName);
525 
526         if (!"NONE".equals(lang)) {
527             checkTypes.add(lang);
528             checkTypes.add(lang + "." + viewName);
529             checkTypes.add(lang + "." + fileType);
530             checkTypes.add(lang + "." + fileType + "." + viewName);
531         }
532 
533         if (currentForm != null && !fileType.equals(currentForm)) {
534             checkTypes.add(currentForm);
535             checkTypes.add(currentForm + "." + viewName);
536             if (!"NONE".equals(lang)) {
537                 checkTypes.add(lang + "." + currentForm);
538                 checkTypes.add(lang + "." + currentForm + "." + viewName);
539             }
540         }
541         this.logger.debug("Types to be checked for named view {}: {}", viewName, checkTypes);
542         return checkTypes;
543     }
544 
545     protected boolean denyListContains(final String fileType, final String viewName) {
546         String fullName = fileType + "." + viewName;
547         if (this.denylist.contains(viewName) || this.denylist.contains(fullName)) {
548             return true;
549         }
550         return this.wildCardDenylist.stream().anyMatch(i -> viewName.startsWith(i) || fullName.startsWith(i));
551     }
552 
553     /**
554      * Makes a set of the file type, current form with and without the .PrimaryView qualifier. Result set can be passed to
555      * {@link #isOutputtable(Collection)} for checking whether the primary view should be output
556      *
557      * @param d the payload
558      */
559     protected Set<String> getPrimaryTypesToCheck(final IBaseDataObject d) {
560         final Set<String> checkTypes = getTypesToCheckForNamedView(d, PRIMARY_VIEW_NAME);
561         final String lang = this.dropOffUtil.getLanguage(d);
562         checkTypes.add(lang + LANGUAGE_VIEW);
563         checkTypes.add(ALL_LANGUAGE_VIEWS);
564         checkTypes.add(ALL_PRIMARY_VIEWS);
565         return checkTypes;
566     }
567 
568     @Override
569     public String getOutputSpec() {
570         return this.outputSpec;
571     }
572 
573     @Override
574     public String getErrorSpec() {
575         return this.errorSpec;
576     }
577 
578     @Override
579     public Collection<String> getOutputTypes() {
580         return new HashSet<>(this.outputTypes);
581     }
582 
583     public boolean matchesDenylistFiletypeFormatPattern(String str) {
584         return denylistFiletypeFormatPattern.matcher(str).matches();
585     }
586 
587     public String getDenylistFiletypeFormat() {
588         return denylistFiletypeFormatPattern.pattern();
589     }
590 
591     public boolean matchesDenylistViewNameFormatPattern(String str) {
592         return denylistViewNameFormatPattern.matcher(str).matches();
593     }
594 
595     public String getDenylistViewNameFormat() {
596         return denylistViewNameFormatPattern.pattern();
597     }
598 }