package com.oracle.truffle.regex.charset;
import static com.oracle.truffle.regex.util.BitSets.highByte;
import static com.oracle.truffle.regex.util.BitSets.lowByte;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
import com.oracle.truffle.regex.tregex.matchers.AnyMatcher;
import com.oracle.truffle.regex.tregex.matchers.BitSetMatcher;
import com.oracle.truffle.regex.tregex.matchers.CharMatcher;
import com.oracle.truffle.regex.tregex.matchers.EmptyMatcher;
import com.oracle.truffle.regex.tregex.matchers.HybridBitSetMatcher;
import com.oracle.truffle.regex.tregex.matchers.InvertibleCharMatcher;
import com.oracle.truffle.regex.tregex.matchers.MultiBitSetMatcher;
import com.oracle.truffle.regex.tregex.matchers.RangeListMatcher;
import com.oracle.truffle.regex.tregex.matchers.RangeTreeMatcher;
import com.oracle.truffle.regex.tregex.matchers.SingleCharMatcher;
import com.oracle.truffle.regex.tregex.matchers.SingleRangeMatcher;
import com.oracle.truffle.regex.tregex.matchers.TwoCharMatcher;
import com.oracle.truffle.regex.util.CompilationFinalBitSet;
public class CharMatchers {
public static CharMatcher createMatcher(CodePointSet cps, CompilationBuffer compilationBuffer) {
if (cps.matchesMinAndMax(compilationBuffer.getEncoding()) || cps.inverseIsSameHighByte(compilationBuffer.getEncoding())) {
return createMatcher(cps.createInverse(compilationBuffer.getEncoding()), compilationBuffer, true);
}
return createMatcher(cps, compilationBuffer, false);
}
private static CharMatcher createMatcher(CodePointSet cps, CompilationBuffer compilationBuffer, boolean inverse) {
if (cps.isEmpty()) {
return EmptyMatcher.create(inverse);
}
if (cps.matchesEverything(compilationBuffer.getEncoding())) {
return AnyMatcher.create(inverse);
}
if (cps.matchesSingleChar()) {
return SingleCharMatcher.create(inverse, cps.getMin());
}
if (cps.valueCountEquals(2)) {
return TwoCharMatcher.create(inverse, cps.getMin(), cps.getMax());
}
int size = cps.size();
if (size == 1) {
return SingleRangeMatcher.create(inverse, cps.getMin(), cps.getMax());
}
if (preferRangeListMatcherOverBitSetMatcher(cps, size)) {
return RangeListMatcher.create(inverse, cps.toArray());
}
if (highByte(cps.getMin()) == highByte(cps.getMax())) {
return convertToBitSetMatcher(cps, compilationBuffer, inverse);
}
if (size > 100 && cps.getMax() <= 0xffff) {
return MultiBitSetMatcher.fromRanges(inverse, cps);
} else {
CompressedCodePointSet ccps = CompressedCodePointSet.create(cps, compilationBuffer);
if (ccps.hasBitSets()) {
return HybridBitSetMatcher.create(inverse, ccps);
} else if (ccps.size() <= 10) {
return RangeListMatcher.create(inverse, ccps.getRanges());
} else {
return RangeTreeMatcher.fromRanges(inverse, ccps.getRanges());
}
}
}
private static boolean preferRangeListMatcherOverBitSetMatcher(CodePointSet cps, int size) {
return size <= 2 || cps.valueCountMax(4);
}
private static InvertibleCharMatcher convertToBitSetMatcher(CodePointSet cps, CompilationBuffer compilationBuffer, boolean inverse) {
int highByte = highByte(cps.getMin());
CompilationFinalBitSet bs = compilationBuffer.getByteSizeBitSet();
for (int i = 0; i < cps.size(); i++) {
assert highByte(cps.getLo(i)) == highByte && highByte(cps.getHi(i)) == highByte;
bs.setRange(lowByte(cps.getLo(i)), lowByte(cps.getHi(i)));
}
return BitSetMatcher.create(inverse, highByte, bs.toLongArray());
}
@TruffleBoundary
public static String rangesToString(int[] ranges) {
return rangesToString(ranges, false);
}
@TruffleBoundary
public static String rangesToString(int[] ranges, boolean numeric) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < ranges.length; i += 2) {
if (numeric) {
sb.append("[").append(ranges[i]).append("-").append(ranges[i + 1]).append("]");
} else {
sb.append(Range.toString(ranges[i], ranges[i + 1]));
}
}
return sb.toString();
}
}