package jflex.core.unicode;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jflex.chars.Interval;
public class UnicodeProperties {
public static final String UNICODE_VERSIONS =
"1.1, 1.1.5, 2, 2.0, 2.0.14, 2.1, 2.1.9, 3, 3.0, 3.0.1, 3.1, 3.1.0, 3.2, 3.2.0, 4, 4.0, 4.0.1, 4.1, 4.1.0, 5, 5.0, 5.0.0, 5.1, 5.1.0, 5.2, 5.2.0, 6, 6.0, 6.0.0, 6.1, 6.1.0, 6.2, 6.2.0, 6.3, 6.3.0, 7, 7.0, 7.0.0, 8, 8.0, 8.0.0, 9, 9.0, 9.0.0, 10, 10.0, 10.0.0, 11, 11.0, 11.0.0, 12, 12.0, 12.0.0, 12.1, 12.1.0";
private static final String DEFAULT_UNICODE_VERSION = "12.1";
private static final Pattern WORD_SEP_PATTERN = Pattern.compile("[-_\\s()]");
private int maximumCodePoint;
private final Map<String, IntCharSet> propertyValueIntervals = new HashMap<>();
private String caselessMatchPartitions;
private int caselessMatchPartitionSize;
private IntCharSet[] caselessMatches;
public UnicodeProperties() throws UnsupportedUnicodeVersionException {
init(DEFAULT_UNICODE_VERSION);
}
public UnicodeProperties(String version) throws UnsupportedUnicodeVersionException {
init(version);
}
public int getMaximumCodePoint() {
return maximumCodePoint;
}
public IntCharSet getIntCharSet(String propertyValue) {
return propertyValueIntervals.get(normalize(propertyValue));
}
public Set<String> getPropertyValues() {
return propertyValueIntervals.keySet();
}
public IntCharSet getCaselessMatches(int c) {
if (null == caselessMatches) initCaselessMatches();
return caselessMatches[c];
}
private void initCaselessMatches() {
caselessMatches = new IntCharSet[maximumCodePoint + 1];
int[] members = new int[caselessMatchPartitionSize];
for (int index = 0; index < caselessMatchPartitions.length(); ) {
IntCharSet partition = new IntCharSet();
for (int n = 0; n < caselessMatchPartitionSize; ++n) {
int c = caselessMatchPartitions.codePointAt(index);
index += Character.charCount(c);
members[n] = c;
if (c > 0) partition.add(c);
}
if (partition.containsElements()) {
for (int n = 0; n < caselessMatchPartitionSize; ++n) {
if (members[n] > 0) caselessMatches[members[n]] = partition;
}
}
}
}
private void init(String version) throws UnsupportedUnicodeVersionException {
if (Objects.equals(version, "1.1") || Objects.equals(version, "1.1.5")) {
bind(
jflex.core.unicode.data.Unicode_1_1.propertyValues,
jflex.core.unicode.data.Unicode_1_1.intervals,
jflex.core.unicode.data.Unicode_1_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_1_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_1_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_1_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "2")
|| Objects.equals(version, "2.0")
|| Objects.equals(version, "2.0.14")) {
bind(
jflex.core.unicode.data.Unicode_2_0.propertyValues,
jflex.core.unicode.data.Unicode_2_0.intervals,
jflex.core.unicode.data.Unicode_2_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_2_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_2_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_2_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "2.1") || Objects.equals(version, "2.1.9")) {
bind(
jflex.core.unicode.data.Unicode_2_1.propertyValues,
jflex.core.unicode.data.Unicode_2_1.intervals,
jflex.core.unicode.data.Unicode_2_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_2_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_2_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_2_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "3")
|| Objects.equals(version, "3.0")
|| Objects.equals(version, "3.0.1")) {
bind(
jflex.core.unicode.data.Unicode_3_0.propertyValues,
jflex.core.unicode.data.Unicode_3_0.intervals,
jflex.core.unicode.data.Unicode_3_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_3_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_3_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_3_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "3.1") || Objects.equals(version, "3.1.0")) {
bind(
jflex.core.unicode.data.Unicode_3_1.propertyValues,
jflex.core.unicode.data.Unicode_3_1.intervals,
jflex.core.unicode.data.Unicode_3_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_3_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_3_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_3_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "3.2") || Objects.equals(version, "3.2.0")) {
bind(
jflex.core.unicode.data.Unicode_3_2.propertyValues,
jflex.core.unicode.data.Unicode_3_2.intervals,
jflex.core.unicode.data.Unicode_3_2.propertyValueAliases,
jflex.core.unicode.data.Unicode_3_2.maximumCodePoint,
jflex.core.unicode.data.Unicode_3_2.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_3_2.caselessMatchPartitionSize);
} else if (Objects.equals(version, "4")
|| Objects.equals(version, "4.0")
|| Objects.equals(version, "4.0.1")) {
bind(
jflex.core.unicode.data.Unicode_4_0.propertyValues,
jflex.core.unicode.data.Unicode_4_0.intervals,
jflex.core.unicode.data.Unicode_4_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_4_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_4_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_4_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "4.1") || Objects.equals(version, "4.1.0")) {
bind(
jflex.core.unicode.data.Unicode_4_1.propertyValues,
jflex.core.unicode.data.Unicode_4_1.intervals,
jflex.core.unicode.data.Unicode_4_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_4_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_4_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_4_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "5")
|| Objects.equals(version, "5.0")
|| Objects.equals(version, "5.0.0")) {
bind(
jflex.core.unicode.data.Unicode_5_0.propertyValues,
jflex.core.unicode.data.Unicode_5_0.intervals,
jflex.core.unicode.data.Unicode_5_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_5_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_5_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_5_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "5.1") || Objects.equals(version, "5.1.0")) {
bind(
jflex.core.unicode.data.Unicode_5_1.propertyValues,
jflex.core.unicode.data.Unicode_5_1.intervals,
jflex.core.unicode.data.Unicode_5_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_5_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_5_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_5_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "5.2") || Objects.equals(version, "5.2.0")) {
bind(
jflex.core.unicode.data.Unicode_5_2.propertyValues,
jflex.core.unicode.data.Unicode_5_2.intervals,
jflex.core.unicode.data.Unicode_5_2.propertyValueAliases,
jflex.core.unicode.data.Unicode_5_2.maximumCodePoint,
jflex.core.unicode.data.Unicode_5_2.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_5_2.caselessMatchPartitionSize);
} else if (Objects.equals(version, "6")
|| Objects.equals(version, "6.0")
|| Objects.equals(version, "6.0.0")) {
bind(
jflex.core.unicode.data.Unicode_6_0.propertyValues,
jflex.core.unicode.data.Unicode_6_0.intervals,
jflex.core.unicode.data.Unicode_6_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_6_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_6_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_6_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "6.1") || Objects.equals(version, "6.1.0")) {
bind(
jflex.core.unicode.data.Unicode_6_1.propertyValues,
jflex.core.unicode.data.Unicode_6_1.intervals,
jflex.core.unicode.data.Unicode_6_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_6_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_6_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_6_1.caselessMatchPartitionSize);
} else if (Objects.equals(version, "6.2") || Objects.equals(version, "6.2.0")) {
bind(
jflex.core.unicode.data.Unicode_6_2.propertyValues,
jflex.core.unicode.data.Unicode_6_2.intervals,
jflex.core.unicode.data.Unicode_6_2.propertyValueAliases,
jflex.core.unicode.data.Unicode_6_2.maximumCodePoint,
jflex.core.unicode.data.Unicode_6_2.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_6_2.caselessMatchPartitionSize);
} else if (Objects.equals(version, "6.3") || Objects.equals(version, "6.3.0")) {
bind(
jflex.core.unicode.data.Unicode_6_3.propertyValues,
jflex.core.unicode.data.Unicode_6_3.intervals,
jflex.core.unicode.data.Unicode_6_3.propertyValueAliases,
jflex.core.unicode.data.Unicode_6_3.maximumCodePoint,
jflex.core.unicode.data.Unicode_6_3.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_6_3.caselessMatchPartitionSize);
} else if (Objects.equals(version, "7")
|| Objects.equals(version, "7.0")
|| Objects.equals(version, "7.0.0")) {
bind(
jflex.core.unicode.data.Unicode_7_0.propertyValues,
jflex.core.unicode.data.Unicode_7_0.intervals,
jflex.core.unicode.data.Unicode_7_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_7_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_7_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_7_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "8")
|| Objects.equals(version, "8.0")
|| Objects.equals(version, "8.0.0")) {
bind(
jflex.core.unicode.data.Unicode_8_0.propertyValues,
jflex.core.unicode.data.Unicode_8_0.intervals,
jflex.core.unicode.data.Unicode_8_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_8_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_8_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_8_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "9")
|| Objects.equals(version, "9.0")
|| Objects.equals(version, "9.0.0")) {
bind(
jflex.core.unicode.data.Unicode_9_0.propertyValues,
jflex.core.unicode.data.Unicode_9_0.intervals,
jflex.core.unicode.data.Unicode_9_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_9_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_9_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_9_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "10")
|| Objects.equals(version, "10.0")
|| Objects.equals(version, "10.0.0")) {
bind(
jflex.core.unicode.data.Unicode_10_0.propertyValues,
jflex.core.unicode.data.Unicode_10_0.intervals,
jflex.core.unicode.data.Unicode_10_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_10_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_10_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_10_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "11")
|| Objects.equals(version, "11.0")
|| Objects.equals(version, "11.0.0")) {
bind(
jflex.core.unicode.data.Unicode_11_0.propertyValues,
jflex.core.unicode.data.Unicode_11_0.intervals,
jflex.core.unicode.data.Unicode_11_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_11_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_11_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_11_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "12")
|| Objects.equals(version, "12.0")
|| Objects.equals(version, "12.0.0")) {
bind(
jflex.core.unicode.data.Unicode_12_0.propertyValues,
jflex.core.unicode.data.Unicode_12_0.intervals,
jflex.core.unicode.data.Unicode_12_0.propertyValueAliases,
jflex.core.unicode.data.Unicode_12_0.maximumCodePoint,
jflex.core.unicode.data.Unicode_12_0.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_12_0.caselessMatchPartitionSize);
} else if (Objects.equals(version, "12.1") || Objects.equals(version, "12.1.0")) {
bind(
jflex.core.unicode.data.Unicode_12_1.propertyValues,
jflex.core.unicode.data.Unicode_12_1.intervals,
jflex.core.unicode.data.Unicode_12_1.propertyValueAliases,
jflex.core.unicode.data.Unicode_12_1.maximumCodePoint,
jflex.core.unicode.data.Unicode_12_1.caselessMatchPartitions,
jflex.core.unicode.data.Unicode_12_1.caselessMatchPartitionSize);
} else {
throw new UnsupportedUnicodeVersionException();
}
}
private void bind(
String[] propertyValues,
String[] intervals,
String[] propertyValueAliases,
int maximumCodePoint,
String caselessMatchPartitions,
int caselessMatchPartitionSize) {
this.caselessMatchPartitions = caselessMatchPartitions;
this.caselessMatchPartitionSize = caselessMatchPartitionSize;
this.maximumCodePoint = maximumCodePoint;
for (int n = 0; n < propertyValues.length; ++n) {
String propertyValue = propertyValues[n];
String propertyIntervals = intervals[n];
IntCharSet set = new IntCharSet();
for (int index = 0; index < propertyIntervals.length(); ) {
int start = propertyIntervals.codePointAt(index);
index += Character.charCount(start);
int end = propertyIntervals.codePointAt(index);
index += Character.charCount(end);
set.add(new Interval(start, end));
}
propertyValueIntervals.put(propertyValue, set);
if (2 == propertyValue.length()) {
String singleLetter = propertyValue.substring(0, 1);
IntCharSet singleLetterPropValueSet = propertyValueIntervals.get(singleLetter);
if (null == singleLetterPropValueSet) {
singleLetterPropValueSet = new IntCharSet();
propertyValueIntervals.put(singleLetter, singleLetterPropValueSet);
}
singleLetterPropValueSet.add(set);
}
}
for (int n = 0; n < propertyValueAliases.length; n += 2) {
String alias = propertyValueAliases[n];
String propertyValue = propertyValueAliases[n + 1];
IntCharSet targetSet = propertyValueIntervals.get(propertyValue);
if (null != targetSet) {
propertyValueIntervals.put(alias, targetSet);
}
}
bindInvariantIntervals();
}
private void bindInvariantIntervals() {
IntCharSet asciiSet = IntCharSet.ofCharacterRange(0, 0x7F);
propertyValueIntervals.put(normalize("ASCII"), asciiSet);
IntCharSet anySet = IntCharSet.ofCharacterRange(0, maximumCodePoint);
propertyValueIntervals.put(normalize("Any"), anySet);
}
private static String normalize(String identifier) {
if (null == identifier) return identifier;
Matcher matcher = WORD_SEP_PATTERN.matcher(identifier.toLowerCase(Locale.ENGLISH));
return matcher.replaceAll("").replace(':', '=');
}
public static class UnsupportedUnicodeVersionException extends Exception {
private static final long serialVersionUID = -1718158223161422981L;
public UnsupportedUnicodeVersionException() {
super("Supported versions: " + UNICODE_VERSIONS);
}
public UnsupportedUnicodeVersionException(Throwable cause) {
super("Supported versions: " + UNICODE_VERSIONS, cause);
}
}
}