CharacterCounterSet.java
package emissary.util;
import java.util.List;
/**
* A set of named counters for keeping counts on various classes of characters encountered
*/
public class CharacterCounterSet extends CounterSet {
private static final long serialVersionUID = -7111758159975960091L;
public static final List<String> CHARACTER_TYPE_KEYS =
List.of("CHARACTER_LETTER", "CHARACTER_DIGIT", "CHARACTER_BLANK_SPACE", "CHARACTER_ISO_CONTROL",
"CHARACTER_PUNCTUATION", "CHARACTER_OTHER");
/**
* Create a set of character counters
*/
public CharacterCounterSet() {
loadCharacterKeys();
}
/**
* Create a set of character counters
*
* @param initialCapacity the hash map initial capacity
*/
public CharacterCounterSet(int initialCapacity) {
super(initialCapacity);
loadCharacterKeys();
}
/**
* Create a set of character counters
*
* @param initialCapacity the hash map initial capacity
* @param loadFactor the hash map load factor
*/
public CharacterCounterSet(int initialCapacity, float loadFactor) {
super(initialCapacity, loadFactor);
loadCharacterKeys();
}
/**
* Load in our special character class keys
*/
protected void loadCharacterKeys() {
addKeys(CHARACTER_TYPE_KEYS);
}
/**
* Easy access to letters
*/
public int getLetterCount() {
return get("CHARACTER_LETTER");
}
/**
* Easy access to digits
*/
public int getDigitCount() {
return get("CHARACTER_DIGIT");
}
/**
* Easy access to blank space
*/
public int getBlankSpaceCount() {
return get("CHARACTER_BLANK_SPACE");
}
/**
*
* Easy access to control
*/
public int getIsoControlCount() {
return get("CHARACTER_ISO_CONTROL");
}
/**
* Easy access to punctuation
*/
public int getPunctuationCount() {
return get("CHARACTER_PUNCTUATION");
}
/**
* Easy access to other
*/
public int getOtherCount() {
return get("CHARACTER_OTHER");
}
/**
* Count the characters in s by class. This works by codepoint and handles codepoints beyond the BMP
*
* @param s the string to perform the count on
*/
public void count(String s) {
int cpc = s.codePointCount(0, s.length());
for (int i = 0; i < cpc; i++) {
int offset = s.offsetByCodePoints(0, i);
int cp = s.codePointAt(offset);
int[] cpa = {cp};
String scp = new String(cpa, 0, 1);
if (Character.isLetter(cp)) {
increment("CHARACTER_LETTER");
} else if (Character.isDigit(cp)) {
increment("CHARACTER_DIGIT");
} else if (Character.isSpaceChar(cp)) {
increment("CHARACTER_BLANK_SPACE");
} else if (Character.isISOControl(cp)) {
increment("CHARACTER_ISO_CONTROL");
} else if (scp
.matches("[\\p{Punct}\\p{InGeneralPunctuation}\\u00a1-\\u00bf\\uff01-\\uff0f\\uff1a-\\uff20\\uff38-\\uff40\\uff5b-\\uff60]")) {
increment("CHARACTER_PUNCTUATION");
} else {
increment("CHARACTER_OTHER");
}
}
}
}