View Javadoc
1   package emissary.output.filter;
2   
3   import emissary.config.ConfigUtil;
4   import emissary.config.Configurator;
5   import emissary.core.EmissaryRuntimeException;
6   import emissary.core.IBaseDataObject;
7   import emissary.output.DropOffUtil;
8   import emissary.util.JavaCharSet;
9   
10  import jakarta.annotation.Nullable;
11  import org.apache.commons.lang3.StringUtils;
12  import org.slf4j.Logger;
13  import org.slf4j.LoggerFactory;
14  
15  import java.io.IOException;
16  import java.io.OutputStream;
17  import java.io.UnsupportedEncodingException;
18  import java.util.ArrayList;
19  import java.util.Collection;
20  import java.util.Collections;
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Locale;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.regex.Pattern;
27  
28  /**
29   * Provides the base mechanism for a drop off filter
30   */
31  public abstract class AbstractFilter implements IDropOffFilter {
32      /** A static convenience logger */
33      protected static final Logger slogger = LoggerFactory.getLogger(AbstractFilter.class);
34  
35      /** get a logger configured on the impl's classname */
36      protected Logger logger = LoggerFactory.getLogger(this.getClass().getName());
37  
38      /** hold onto the parent configurator */
39      protected Configurator configG;
40  
41      /** hold onto the specific filter configurator */
42      protected Configurator filterConfig;
43  
44      /** hold onto the filter name, impl should set this */
45      protected String filterName = "Abstract";
46  
47      /** hold the output specification, if any, for this filter */
48      protected String outputSpec;
49  
50      /** hold the error specification, if any, for this filter */
51      protected String errorSpec;
52  
53      /** hold the filter condition specification, if any, for this filter */
54      protected String filterConditionSpec;
55  
56      /** hold the filter condition, if any, for this filter */
57      private IFilterCondition filterCondition;
58  
59      /**
60       * A set of FileType and FileTYpe.ViewName strings controlling what can be output by this filter
61       */
62      protected Set<String> outputTypes = Collections.emptySet();
63  
64      /** String to use when dealing with the primary view specifically */
65      public static final String PRIMARY_VIEW_NAME = "PrimaryView";
66      public static final String PRIMARY_VIEW = "." + PRIMARY_VIEW_NAME;
67  
68      /** Primary view wildcard string */
69      public static final String ALL_PRIMARY_VIEWS = "*" + PRIMARY_VIEW;
70  
71      /** String to use when dealing with a language in a view */
72      public static final String LANGUAGE_VIEW_NAME = "Language";
73      public static final String LANGUAGE_VIEW = "." + LANGUAGE_VIEW_NAME;
74  
75      /** Language wildcard string */
76      public static final String ALL_LANGUAGE_VIEWS = "*" + LANGUAGE_VIEW;
77  
78      /** Alternate view wildcard string */
79      public static final String ALL_ALT_VIEWS = "*.AlternateView";
80  
81      /** Metadata view name */
82      public static final String METADATA_VIEW_NAME = "Metadata";
83      public static final String METADATA_VIEW = "." + METADATA_VIEW_NAME;
84  
85      /* alternate views to NOT output if only a file type/form is specified */
86      protected Set<String> denylist = new HashSet<>();
87      protected Set<String> wildCardDenylist = new HashSet<>();
88  
89      protected Set<String> viewNamePrefixCheckTypes = new HashSet<>();
90  
91      @Nullable
92      protected DropOffUtil dropOffUtil = null;
93  
94      protected String denylistAllowedNameChars = "a-zA-Z0-9_\\-";
95      protected String denylistFiletypeFormat = "^[%s]+$";
96      protected Pattern denylistFiletypeFormatPattern;
97      protected String denylistViewNameFormat = "^[%s]+(\\.[%s]+)?\\*?$";
98      protected Pattern denylistViewNameFormatPattern;
99  
100     /**
101      * Initialization phase hook for the filter with default preferences for the runtime configuration of the filter
102      */
103     @Override
104     public void initialize(final Configurator theConfigG, final String filterName) {
105         loadFilterConfiguration(null);
106         initialize(theConfigG, filterName, this.filterConfig);
107     }
108 
109     /**
110      * Initialization phase hook for the filter with provided filter configuration
111      * 
112      * @param theConfigG passed in configuration object, usually DropOff's config
113      * @param filterName the configured name of this filter or null for the default
114      * @param theFilterConfig the configuration for the specific filter
115      */
116     @Override
117     public void initialize(final Configurator theConfigG, @Nullable final String filterName,
118             final Configurator theFilterConfig) {
119         this.configG = theConfigG;
120         if (filterName != null) {
121             setFilterName(filterName);
122         }
123         loadFilterConfiguration(theFilterConfig);
124         loadFilterCondition(theConfigG);
125         loadOutputSpec(theConfigG);
126         this.dropOffUtil = new DropOffUtil(theConfigG);
127         initializeOutputTypes(this.filterConfig);
128     }
129 
130     private void loadFilterCondition(final Configurator parentConfig) {
131         this.filterConditionSpec = parentConfig.findStringEntry("FILTER_CONDITION_" + getFilterName(), null);
132 
133         // format FILTER_CONDITION_<filtername> = profilename:clazz just like dropoff filter config
134         if (!StringUtils.isEmpty(filterConditionSpec)) {
135             final String name;
136             final String clazz;
137             Configurator filterConfig = null;
138             final int colpos = filterConditionSpec.indexOf(':');
139             if (colpos > -1) {
140                 name = filterConditionSpec.substring(0, colpos);
141                 clazz = filterConditionSpec.substring(colpos + 1);
142                 final String filterConfigName = parentConfig.findStringEntry(name);
143                 if (filterConfigName != null) {
144                     try {
145                         filterConfig = ConfigUtil.getConfigInfo(filterConfigName);
146                     } catch (IOException configError) {
147                         logger.warn("Specified filter configuration {} cannot be loaded", filterConfigName);
148                         return;
149                     }
150                 }
151             } else {
152                 clazz = filterConditionSpec;
153             }
154 
155             try {
156                 final Object filterConditionObj = emissary.core.Factory.create(clazz);
157 
158                 if (filterConditionObj != null && filterConditionObj instanceof IFilterCondition) {
159                     this.filterCondition = (IFilterCondition) filterConditionObj;
160                     // initialize using the config
161                     filterCondition.initialize(filterConfig);
162                 } else {
163                     logger.warn("Failed to initialize filter condition {}. Filter does not implement IFilterCondition", getFilterName());
164                 }
165             } catch (Throwable t) {
166                 // failed to initialize
167                 logger.error("Failed in initialize filter condition {} with argument {} and message {}", getFilterName(), filterConditionSpec,
168                         t.getMessage(), t);
169             }
170 
171         }
172     }
173 
174     /**
175      * Run custom configuration
176      * 
177      * @param config the filter specific configurator
178      */
179     protected void initializeOutputTypes(@Nullable final Configurator config) {
180         if (config != null) {
181             this.loadNameValidationPatterns(config);
182             viewNamePrefixCheckTypes = config.findEntriesAsSet("VIEW_NAME_PREFIX_CHECK_TYPES");
183             this.outputTypes = config.findEntriesAsSet("OUTPUT_TYPE");
184             this.logger.debug("Loaded {} output types for filter {}", this.outputTypes.size(), this.outputTypes);
185             this.initializeDenylist(config);
186         } else {
187             this.logger.debug("InitializeCustom has null filter config");
188         }
189     }
190 
191     protected void loadNameValidationPatterns(final Configurator config) {
192         denylistAllowedNameChars = config.findStringEntry("DENYLIST_ALLOWED_NAME_CHARS", denylistAllowedNameChars);
193         denylistFiletypeFormat = config.findStringEntry("DENYLIST_FILETYPE_FORMAT", denylistFiletypeFormat);
194         denylistFiletypeFormatPattern = Pattern.compile(denylistFiletypeFormat.replace("%s", denylistAllowedNameChars));
195         denylistViewNameFormat = config.findStringEntry("DENYLIST_VIEW_NAME_FORMAT", denylistViewNameFormat);
196         denylistViewNameFormatPattern = Pattern.compile(denylistViewNameFormat.replace("%s", denylistAllowedNameChars));
197     }
198 
199     protected void initializeDenylist(final Configurator config) {
200         for (String entry : config.findEntriesAsSet("DENYLIST")) {
201             String viewName = validateAndRemoveDenylistFiletype(entry);
202             if (matchesDenylistViewNameFormatPattern(viewName)) {
203                 if (viewName.chars().filter(ch -> ch == '.').count() > 0) {
204                     logger.warn("`DENYLIST = \"{}\"` viewName `{}` should not contain any `.` characters", entry, viewName);
205                 }
206 
207                 if (viewName.endsWith("*")) {
208                     String strippedEntry = entry.substring(0, entry.length() - 1);
209                     this.wildCardDenylist.add(strippedEntry);
210                 } else {
211                     this.denylist.add(entry);
212                 }
213 
214             } else {
215                 throw new EmissaryRuntimeException(String.format(
216                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
217                                 "entry `%s` must match pattern `%s`.",
218                         entry, entry, getDenylistViewNameFormat()));
219             }
220         }
221 
222         this.logger.debug("Loaded {} ignorelist types for filter {}", this.denylist.size(), this.denylist);
223         this.logger.debug("Loaded {} wildcard suffix ignorelist types for filter {}", this.wildCardDenylist.size(), this.wildCardDenylist);
224     }
225 
226     protected String validateAndRemoveDenylistFiletype(final String entry) {
227         String[] names = entry.split("\\.", 2);
228 
229         if (names.length > 1) {
230             String filetype = names[0];
231             String viewName = names[1];
232 
233             if (filetype.equals("*")) { // DENYLIST = "*.<viewName>" not allowed
234                 throw new EmissaryRuntimeException(String.format(
235                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
236                                 "wildcarded filetypes not allowed in denylist - Did you mean `DENYLIST = \"%s\"`?",
237                         entry, viewName));
238             } else if (!matchesDenylistFiletypeFormatPattern(filetype)) {
239                 throw new EmissaryRuntimeException(String.format(
240                         "Invalid filter configuration: `DENYLIST = \"%s\"` " +
241                                 "filetype `%s` must match pattern `%s`",
242                         entry, filetype, getDenylistFiletypeFormat()));
243             }
244             return viewName;
245         }
246         return entry;
247     }
248 
249     /**
250      * Return the name of this filter
251      * 
252      * @return the string name of the filter
253      */
254     @Override
255     public String getFilterName() {
256         return this.filterName;
257     }
258 
259     /**
260      * Set the filter name
261      * 
262      * @param s the new name to use for this filter instance
263      */
264     @Override
265     public void setFilterName(final String s) {
266         this.filterName = s;
267     }
268 
269     /**
270      * Load the filter configuration with precendence of provided, named, default Preference order for loading
271      * configurations
272      * <ol>
273      * <li>[filter-package].FILTER_NAME.cfg</li>
274      * <li>[filter-package].[filter-class]-FILTER_NAME.cfg</li>
275      * <li>[filter-package].[filter-class].cfg</li>
276      * </ol>
277      * 
278      * @param suppliedFilterConfig configuration to use when not null
279      */
280     protected void loadFilterConfiguration(@Nullable final Configurator suppliedFilterConfig) {
281         if (suppliedFilterConfig != null) {
282             this.filterConfig = suppliedFilterConfig;
283             return;
284         }
285 
286         final List<String> configPreferences = new ArrayList<>();
287 
288         if (getFilterName() != null) {
289             configPreferences.add(this.getClass().getPackage().getName() + "." + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
290             configPreferences.add(this.getClass().getName() + "-" + getFilterName() + ConfigUtil.CONFIG_FILE_ENDING);
291         }
292         configPreferences.add(this.getClass().getName() + ConfigUtil.CONFIG_FILE_ENDING);
293 
294         this.logger.debug("Looking for filter configuration preferences {}", configPreferences);
295         try {
296             this.filterConfig = ConfigUtil.getConfigInfo(configPreferences);
297         } catch (IOException iox) {
298             this.logger.debug("Could not find filter configuration for {}", getFilterName(), iox);
299         }
300     }
301 
302     /**
303      * Run the filter for a set of documents
304      * 
305      * @param list collection of IBaseDataObject to run the filter on
306      * @param params map of params
307      * @return status value
308      */
309     @Override
310     public int filter(final List<IBaseDataObject> list, final Map<String, Object> params) {
311         // Important to process them in order, if not already sorted
312 
313         int status = 0;
314         for (final IBaseDataObject d : list) {
315             status = filter(d, params);
316         }
317         return status;
318     }
319 
320     /**
321      * Run the filter for a set of documents
322      * 
323      * @param list collection of IBaseDataObject to run the filter on
324      * @param params map of params
325      * @param output the output stream
326      * @return status value
327      */
328     @Override
329     public int filter(final List<IBaseDataObject> list, final Map<String, Object> params, final OutputStream output) {
330 
331         int status = 0;
332         for (final IBaseDataObject d : list) {
333             status = filter(d, params, output);
334         }
335         return status;
336     }
337 
338     /**
339      * The method that all filter have to provide for stream based output
340      */
341     @Override
342     public int filter(final IBaseDataObject payload, final Map<String, Object> params, final OutputStream output) {
343         throw new IllegalArgumentException("Not supported, override to support");
344     }
345 
346     @Override
347     public boolean isOutputtable(final IBaseDataObject d) {
348         return filterCondition == null || filterCondition.accept(d);
349     }
350 
351     @Override
352     public boolean isOutputtable(final List<IBaseDataObject> list) {
353         return filterCondition == null || filterCondition.accept(list);
354     }
355 
356     /**
357      * Determine if the payload is outputtable by the filter
358      * 
359      * @param d the document
360      * @param params map of params
361      * @return true if the filter wants a crack at outputting this payload
362      */
363     @Override
364     public boolean isOutputtable(final IBaseDataObject d, final Map<String, Object> params) {
365         return true;
366     }
367 
368     /**
369      * Determine if the payload list is outputtable by the filter
370      * 
371      * @param list collection of IBaseDataObject to check for outputtability
372      * @param params map of params
373      * @return true if the filter wants a crack at outputting this payload
374      */
375     @Override
376     public boolean isOutputtable(final List<IBaseDataObject> list, final Map<String, Object> params) {
377         return true;
378     }
379 
380     /**
381      * Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
382      * being on the outputTypes list from the run-time type configuration stream for the filter in questin.
383      *
384      * @param type of the data
385      */
386     protected boolean isOutputtable(final String type) {
387         return this.outputTypes.contains("*") || this.outputTypes.contains(type);
388     }
389 
390     /**
391      * Determine is this payload should be output by this filter Usually by the primary view or one of the alternate views
392      * being on the outputTypes list from the run-time type configuration stream for the filter in question.
393      *
394      * @param types types to check
395      * @return true if any one of the types is outputtable
396      */
397     protected boolean isOutputtable(final Collection<String> types) {
398         if (this.outputTypes.contains("*")) {
399             this.logger.debug("Outputtable due to wildcard in output types");
400             return true;
401         }
402 
403         final boolean canOutput = !Collections.disjoint(this.outputTypes, types);
404         if (canOutput && this.logger.isDebugEnabled()) {
405             final Set<String> outputFor = new HashSet<>();
406             for (final String s : this.outputTypes) {
407                 if (types.contains(s)) {
408                     outputFor.add(s);
409                 }
410             }
411             this.logger.debug("Outputtable due to non-disjoint sets: {}", outputFor);
412         }
413         return canOutput;
414     }
415 
416     /**
417      * Close the filter
418      */
419     @Override
420     public void close() {
421         // nothing to do
422     }
423 
424     /*
425      * Extract my Output Spec from the supplied config info and save it
426      */
427     protected void loadOutputSpec(final Configurator theConfigG) {
428         this.outputSpec = theConfigG.findStringEntry("OUTPUT_SPEC_" + getFilterName(), null);
429         this.errorSpec = theConfigG.findStringEntry("ERROR_SPEC_" + getFilterName(), null);
430         this.logger.debug("Output spec for {} is {}", getFilterName(), this.outputSpec);
431     }
432 
433     /**
434      * Get bytes as UTF-8 converted from specified charset
435      * 
436      * @param value the contents
437      * @param charset the charset of the bytes in value
438      */
439     protected String normalizeBytes(final byte[] value, final String charset) {
440         return normalizeBytes(value, 0, value.length, charset);
441     }
442 
443     /**
444      * Get bytes as UTF-8 converted from specified charset
445      * 
446      * @param value the contents
447      * @param start position to start subarray
448      * @param len length of subarray
449      * @param charset the charset of the bytes in value
450      */
451     protected String normalizeBytes(final byte[] value, final int start, final int len, @Nullable final String charset) {
452         String s = null;
453 
454         if (charset != null) {
455             try {
456                 s = new String(value, start, len, charset);
457             } catch (UnsupportedEncodingException ex) {
458                 this.logger.debug("Error encoding string", ex);
459             }
460         }
461 
462         if (s == null) {
463             // from exception or no charset
464             s = new String(value, start, len);
465         }
466 
467         return s;
468     }
469 
470     /**
471      * Extract the charset from the payload or defaultCharset
472      * 
473      * @param d the payload
474      * @param defaultCharset the default
475      * @return the charset or defualtCharset if none
476      */
477     protected String getCharset(final IBaseDataObject d, final String defaultCharset) {
478         String lang = d.getFontEncoding();
479         if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
480             final String s = d.getStringParameter("HTML_CHARSET");
481             if (s != null) {
482                 lang = s;
483             }
484         }
485         if (lang == null || lang.toUpperCase(Locale.getDefault()).contains("ASCII") || lang.toUpperCase(Locale.getDefault()).contains("8859-1")) {
486             final String s = d.getStringParameter("MIME_CHARSET");
487             if (s != null) {
488                 lang = s;
489             }
490         }
491         if (lang == null) {
492             return defaultCharset;
493         } else {
494             return JavaCharSet.get(lang);
495         }
496     }
497 
498     /**
499      * Makes a set of the file type, current form with and without the .PrimaryView qualifier and all the alternate view
500      * names Result set can be passed to {@link #isOutputtable(Collection)} for checking
501      *
502      * @param d the payload
503      */
504     protected Set<String> getTypesToCheck(final IBaseDataObject d) {
505         final Set<String> checkTypes = getPrimaryTypesToCheck(d);
506         for (final String viewName : d.getAlternateViewNames()) {
507             checkTypes.addAll(getTypesToCheckForNamedView(d, viewName));
508         }
509         checkTypes.addAll(getTypesToCheckForNamedView(d, METADATA_VIEW_NAME));
510         checkTypes.add(ALL_ALT_VIEWS);
511         return checkTypes;
512     }
513 
514     protected Set<String> getTypesToCheckForNamedView(final IBaseDataObject d, final String viewName) {
515         final Set<String> checkTypes = new HashSet<>();
516         final String lang = this.dropOffUtil.getLanguage(d);
517         final String fileType = DropOffUtil.getFileType(d);
518         final String currentForm = d.currentForm();
519 
520         // skip over denylisted alt views
521         if (denyListContains(fileType, viewName)) {
522             return checkTypes;
523         }
524 
525         addViewNamePrefixCheckTypes(viewName, fileType, checkTypes);
526 
527         checkTypes.add(fileType);
528         checkTypes.add(fileType + "." + viewName);
529         checkTypes.add("*." + viewName);
530 
531         if (!"NONE".equals(lang)) {
532             checkTypes.add(lang);
533             checkTypes.add(lang + "." + viewName);
534             checkTypes.add(lang + "." + fileType);
535             checkTypes.add(lang + "." + fileType + "." + viewName);
536         }
537 
538         if (currentForm != null && !fileType.equals(currentForm)) {
539             checkTypes.add(currentForm);
540             checkTypes.add(currentForm + "." + viewName);
541             if (!"NONE".equals(lang)) {
542                 checkTypes.add(lang + "." + currentForm);
543                 checkTypes.add(lang + "." + currentForm + "." + viewName);
544             }
545         }
546         this.logger.debug("Types to be checked for named view {}: {}", viewName, checkTypes);
547         return checkTypes;
548     }
549 
550     protected boolean denyListContains(final String fileType, final String viewName) {
551         String fullName = fileType + "." + viewName;
552         if (this.denylist.contains(viewName) || this.denylist.contains(fullName)) {
553             return true;
554         }
555         return this.wildCardDenylist.stream().anyMatch(i -> viewName.startsWith(i) || fullName.startsWith(i));
556     }
557 
558     /**
559      * If the view name starts with the view name prefix, add the view name prefix to checkTypes
560      * 
561      * @param viewName view name
562      * @param fileType the file type
563      * @param checkTypes compare types with the configured output types
564      */
565     protected void addViewNamePrefixCheckTypes(final String viewName, String fileType, Set<String> checkTypes) {
566         this.viewNamePrefixCheckTypes.stream().filter(viewName::startsWith).forEach(viewNamePrefix -> {
567             checkTypes.add(fileType + "." + viewNamePrefix);
568             checkTypes.add("*." + viewNamePrefix);
569         });
570     }
571 
572     /**
573      * Makes a set of the file type, current form with and without the .PrimaryView qualifier. Result set can be passed to
574      * {@link #isOutputtable(Collection)} for checking whether the primary view should be output
575      *
576      * @param d the payload
577      */
578     protected Set<String> getPrimaryTypesToCheck(final IBaseDataObject d) {
579         final Set<String> checkTypes = getTypesToCheckForNamedView(d, PRIMARY_VIEW_NAME);
580         final String lang = this.dropOffUtil.getLanguage(d);
581         checkTypes.add(lang + LANGUAGE_VIEW);
582         checkTypes.add(ALL_LANGUAGE_VIEWS);
583         checkTypes.add(ALL_PRIMARY_VIEWS);
584         return checkTypes;
585     }
586 
587     @Override
588     public String getOutputSpec() {
589         return this.outputSpec;
590     }
591 
592     @Override
593     public String getErrorSpec() {
594         return this.errorSpec;
595     }
596 
597     @Override
598     public Collection<String> getOutputTypes() {
599         return new HashSet<>(this.outputTypes);
600     }
601 
602     public boolean matchesDenylistFiletypeFormatPattern(String str) {
603         return denylistFiletypeFormatPattern.matcher(str).matches();
604     }
605 
606     public String getDenylistFiletypeFormat() {
607         return denylistFiletypeFormatPattern.pattern();
608     }
609 
610     public boolean matchesDenylistViewNameFormatPattern(String str) {
611         return denylistViewNameFormatPattern.matcher(str).matches();
612     }
613 
614     public String getDenylistViewNameFormat() {
615         return denylistViewNameFormatPattern.pattern();
616     }
617 }