View Javadoc
1   package emissary.util;
2   
3   import jakarta.annotation.Nullable;
4   
5   import java.util.HashMap;
6   import java.util.Locale;
7   import java.util.Map;
8   
9   /**
10   * Convert any charset to a Java Charset name that a JDK 1.1 and up will understand. Just returns ones that need
11   * remapping. Callers should be advised that getting a null back means the code set passed might be valid *as is* or it
12   * might be completely bogus. We currently cannot tell.
13   */
14  public class JavaCharSet {
15  
16      private static final Map<String, String> charsets = new HashMap<>();
17  
18      @SuppressWarnings("NonFinalStaticField")
19      private static boolean initialized = false;
20  
21      /**
22       * Load hash from config file
23       */
24      public static synchronized void initialize(final Map<String, String> mappings) {
25          charsets.putAll(mappings);
26          initialized = true;
27      }
28  
29      /**
30       * Return initialization status
31       */
32      public static synchronized boolean isInitialized() {
33          return initialized;
34      }
35  
36      /**
37       * Look up the encoding and return the Java CharSet for it if different from the string passed in
38       */
39      @Nullable
40      public static String get(@Nullable final String cs) {
41          if (cs == null) {
42              return null;
43          }
44  
45          // Look up in the hash
46          String s = cs.toUpperCase(Locale.getDefault());
47          String charSet = charsets.get(s);
48  
49          String enc = null;
50  
51          // If nothing look for an encoding inside a set of parens
52          if (charSet == null) {
53              final int start = s.indexOf("(");
54              final int stop = s.indexOf(")");
55              if (start > -1 && stop > start) {
56                  enc = s.substring(start + 1, stop);
57                  charSet = charsets.get(enc);
58              }
59          }
60  
61          // If nothing, clean the encoding tag and use it
62          // ID phase can add -<TAG> things to just about
63          // any encoding. It doesn't change the base value
64          // of the characters to strip it out since we should
65          // be processing those out anyway
66          while (s.contains("-") && charSet == null) {
67              s = s.substring(0, s.lastIndexOf("-"));
68              charSet = charsets.get(s);
69          }
70  
71          // Finally, try just the encoding stripped of -<TAG>, etc.
72          while ((charSet == null) && (enc != null) && enc.contains("-")) {
73              enc = enc.substring(0, enc.lastIndexOf("-"));
74              charSet = charsets.get(enc);
75          }
76  
77          // Use the supplied encoding as a last resort, may be null
78          if (charSet == null) {
79              charSet = enc;
80          }
81  
82          return charSet;
83      }
84  
85      /** This class is not meant to be instantiated. */
86      private JavaCharSet() {}
87  }