JavaCharSet.java

package emissary.util;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.annotation.Nullable;

/**
 * Convert any charset to a Java Charset name that a JDK 1.1 and up will understand. Just returns ones that need
 * remapping. Callers should be advised that getting a null back means the code set passed might be valid *as is* or it
 * might be completely bogus. We currently cannot tell.
 */
public class JavaCharSet {

    private static final Map<String, String> charsets = new HashMap<>();

    @SuppressWarnings("NonFinalStaticField")
    private static boolean initialized = false;

    /**
     * Load hash from config file
     */
    public static synchronized void initialize(final Map<String, String> mappings) {
        charsets.putAll(mappings);
        initialized = true;
    }

    /**
     * Return initialization status
     */
    public static synchronized boolean isInitialized() {
        return initialized;
    }

    /**
     * Look up the encoding and return the Java CharSet for it if different from the string passed in
     */
    @Nullable
    public static String get(@Nullable final String cs) {
        if (cs == null) {
            return null;
        }

        // Look up in the hash
        String s = cs.toUpperCase(Locale.getDefault());
        String charSet = charsets.get(s);

        String enc = null;

        // If nothing look for an encoding inside a set of parens
        if (charSet == null) {
            final int start = s.indexOf("(");
            final int stop = s.indexOf(")");
            if (start > -1 && stop > start) {
                enc = s.substring(start + 1, stop);
                charSet = charsets.get(enc);
            }
        }

        // If nothing, clean the encoding tag and use it
        // ID phase can add -<TAG> things to just about
        // any encoding. It doesn't change the base value
        // of the characters to strip it out since we should
        // be processing those out anyway
        while (s.contains("-") && charSet == null) {
            s = s.substring(0, s.lastIndexOf("-"));
            charSet = charsets.get(s);
        }

        // Finally, try just the encoding stripped of -<TAG>, etc.
        while ((charSet == null) && (enc != null) && enc.contains("-")) {
            enc = enc.substring(0, enc.lastIndexOf("-"));
            charSet = charsets.get(enc);
        }

        // Use the supplied encoding as a last resort, may be null
        if (charSet == null) {
            charSet = enc;
        }

        return charSet;
    }

    /** This class is not meant to be instantiated. */
    private JavaCharSet() {}
}