package jflex.core;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import jflex.base.Build;
import jflex.base.IntPair;
import jflex.chars.Interval;
import jflex.core.unicode.CharClasses;
import jflex.core.unicode.IntCharSet;
import jflex.exceptions.GeneratorException;
import jflex.l10n.ErrorMessages;
import jflex.logging.Out;
import jflex.option.Options;
import jflex.state.StateSet;
import jflex.state.StateSetEnumerator;
public final class NFA {
private StateSet[][] table;
private StateSet[] epsilon;
private boolean[] isFinal;
private Action[] action;
private int numStates;
private final int numInput;
private int numLexStates;
private final int estSize;
private CharClasses classes;
private LexScan scanner;
private RegExps regExps;
private final StateSetEnumerator states = new StateSetEnumerator();
private final StateSet tempStateSet = new StateSet();
public NFA(int numInput, int estSize) {
this.numInput = numInput;
this.estSize = estSize;
numStates = 0;
epsilon = new StateSet[estSize];
action = new Action[estSize];
isFinal = new boolean[estSize];
table = new StateSet[estSize][numInput];
}
public NFA(int numInput, LexScan scanner, RegExps regExps, Macros macros, CharClasses classes) {
this(numInput, regExps.NFASize(macros) + 2 * scanner.states.number());
this.scanner = scanner;
this.regExps = regExps;
this.classes = classes;
numLexStates = scanner.states.number();
int new_num = numEntryStates();
ensureCapacity(new_num);
numStates = new_num;
}
public StateSet epsilon(int i) {
return epsilon[i];
}
public int numEntryStates() {
return 2 * (numLexStates + regExps.gen_look_count);
}
public int numInput() {
return numInput;
}
public int numLexStates() {
return numLexStates;
}
public int numStates() {
return numStates;
}
public StateSet reachableStates(int currentState, int nextChar) {
return table[currentState][nextChar];
}
public StateSetEnumerator states() {
return states;
}
public StateSet tempStateSet() {
return tempStateSet;
}
public void addStandaloneRule() {
int start = numStates;
int end = numStates + 1;
for (int c = 0; c < classes.getNumClasses(); c++) addTransition(start, c, end);
for (int i = 0; i < numLexStates * 2; i++) addEpsilonTransition(i, start);
action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE);
isFinal[end] = true;
}
public void addRegExp(int regExpNum) {
if (Build.DEBUG)
Out.debug(
"Adding nfa for regexp " + regExpNum + " :" + Out.NL + regExps.getRegExp(regExpNum));
IntPair nfa = insertNFA(regExps.getRegExp(regExpNum));
List<Integer> lexStates = regExps.getStates(regExpNum);
if (lexStates.isEmpty()) lexStates = scanner.states.getInclusiveStates();
for (Integer stateNum : lexStates) {
if (!regExps.isBOL(regExpNum)) addEpsilonTransition(2 * stateNum, nfa.start());
addEpsilonTransition(2 * stateNum + 1, nfa.start());
}
if (regExps.getLookAhead(regExpNum) != null) {
Action a = regExps.getAction(regExpNum);
if (a.lookAhead() == Action.FINITE_CHOICE) {
insertLookAheadChoices(nfa.end(), a, regExps.getLookAhead(regExpNum));
scanner.actions.remove(a);
} else {
RegExp r1 = regExps.getRegExp(regExpNum);
RegExp r2 = regExps.getLookAhead(regExpNum);
IntPair look = insertNFA(r2);
addEpsilonTransition(nfa.end(), look.start());
action[look.end()] = a;
isFinal[look.end()] = true;
if (a.lookAhead() == Action.GENERAL_LOOK) {
IntPair forward = insertNFA(r1);
IntPair backward = insertNFA(r2.rev());
isFinal[forward.end()] = true;
action[forward.end()] = new Action(Action.FORWARD_ACTION);
isFinal[backward.end()] = true;
action[backward.end()] = new Action(Action.BACKWARD_ACTION);
int entry = 2 * (regExps.getLookEntry(regExpNum) + numLexStates);
addEpsilonTransition(entry, forward.start());
addEpsilonTransition(entry + 1, backward.start());
a.setEntryState(entry);
}
}
} else {
action[nfa.end()] = regExps.getAction(regExpNum);
isFinal[nfa.end()] = true;
}
}
private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
if (lookAhead.type == sym.BAR) {
RegExp2 r = (RegExp2) lookAhead;
insertLookAheadChoices(baseEnd, a, r.r1);
insertLookAheadChoices(baseEnd, a, r.r2);
} else {
int len = SemCheck.length(lookAhead);
if (len >= 0) {
IntPair look = insertNFA(lookAhead);
addEpsilonTransition(baseEnd, look.start());
Action x = a.copyChoice(len);
action[look.end()] = x;
isFinal[look.end()] = true;
scanner.actions.add(x);
} else {
throw new RegExpException(lookAhead);
}
}
}
private void ensureCapacity(int newNumStates) {
int oldLength = epsilon.length;
if (newNumStates < oldLength) return;
int newStatesLength = Math.max(oldLength * 2, newNumStates);
boolean[] newFinal = new boolean[newStatesLength];
Action[] newAction = new Action[newStatesLength];
StateSet[][] newTable = new StateSet[newStatesLength][numInput];
StateSet[] newEpsilon = new StateSet[newStatesLength];
System.arraycopy(isFinal, 0, newFinal, 0, numStates);
System.arraycopy(action, 0, newAction, 0, numStates);
System.arraycopy(epsilon, 0, newEpsilon, 0, numStates);
System.arraycopy(table, 0, newTable, 0, numStates);
isFinal = newFinal;
action = newAction;
epsilon = newEpsilon;
table = newTable;
}
public void addTransition(int start, int input, int dest) {
Out.debug("Adding transition (" + start + ", " + input + ", " + dest + ")");
int maxS = Math.max(start, dest) + 1;
ensureCapacity(maxS);
if (maxS > numStates) numStates = maxS;
if (table[start][input] != null) table[start][input].addState(dest);
else table[start][input] = new StateSet(estSize, dest);
}
public void addEpsilonTransition(int start, int dest) {
int max = Math.max(start, dest) + 1;
ensureCapacity(max);
if (max > numStates) numStates = max;
if (epsilon[start] != null) epsilon[start].addState(dest);
else epsilon[start] = new StateSet(estSize, dest);
}
public boolean containsFinal(StateSet set) {
states.reset(set);
while (states.hasMoreElements()) if (isFinal[states.nextElement()]) return true;
return false;
}
public Action getAction(StateSet set) {
states.reset(set);
Action maxAction = null;
Out.debug("Determining action of : " + set);
while (states.hasMoreElements()) {
Action currentAction = action[states.nextElement()];
if (currentAction != null) {
if (maxAction == null) maxAction = currentAction;
else maxAction = maxAction.getHigherPriority(currentAction);
}
}
return maxAction;
}
private StateSet closure(int startState) {
StateSet notvisited = tempStateSet;
StateSet closure = new StateSet(numStates, startState);
notvisited.clear();
notvisited.addState(startState);
while (notvisited.containsElements()) {
int state = notvisited.getAndRemoveElement();
notvisited.add(closure.complement(epsilon[state]));
closure.add(epsilon[state]);
}
return closure;
}
public void epsilonFill() {
for (int i = 0; i < numStates; i++) {
epsilon[i] = closure(i);
}
}
private StateSet DFAEdge(StateSet start, int input) {
tempStateSet.clear();
states.reset(start);
while (states.hasMoreElements()) tempStateSet.add(table[states.nextElement()][input]);
StateSet result = new StateSet(tempStateSet);
states.reset(tempStateSet);
while (states.hasMoreElements()) result.add(epsilon[states.nextElement()]);
return result;
}
public void dumpTable() {
Out.dump(toString());
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
for (int i = 0; i < numStates; i++) {
result.append("State");
if (isFinal[i]) {
result.append("[FINAL");
String l = action[i].lookString();
if (!Objects.equals(l, "")) {
result.append(", ");
result.append(l);
}
result.append("]");
}
result.append(" ").append(i).append(Out.NL);
for (int input = 0; input < numInput; input++) {
if (table[i][input] != null && table[i][input].containsElements())
result
.append(" with ")
.append(input)
.append(" in ")
.append(table[i][input])
.append(Out.NL);
}
if (epsilon[i] != null && epsilon[i].containsElements())
result.append(" with epsilon in ").append(epsilon[i]).append(Out.NL);
}
return result.toString();
}
public void writeDot(File file) {
try {
PrintWriter writer =
new PrintWriter(
new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8));
writer.println(dotFormat());
writer.close();
} catch (IOException e) {
Out.error(ErrorMessages.FILE_WRITE, file);
throw new GeneratorException(e);
}
}
public String dotFormat() {
StringBuilder result = new StringBuilder();
result.append("digraph NFA {").append(Out.NL);
result.append("rankdir = LR").append(Out.NL);
for (int i = 0; i < numStates; i++) {
if (isFinal[i]) {
result.append(i);
result.append(" [shape = doublecircle]");
result.append(Out.NL);
}
}
for (int i = 0; i < numStates; i++) {
for (int input = 0; input < numInput; input++) {
for (int s : table[i][input]) {
result.append(i).append(" -> ").append(s);
result.append(" [label=\"").append(classes.toString(input)).append("\"]").append(Out.NL);
}
}
for (int s : epsilon[i]) {
result.append(i).append(" -> ").append(s).append(" [style=dotted]").append(Out.NL);
}
}
result.append("}").append(Out.NL);
return result.toString();
}
private void insertLetterNFA(boolean caseless, int ch, int start, int end) {
if (caseless) {
IntCharSet set = IntCharSet.ofCharacter(ch);
IntCharSet caselessSet = set.getCaseless(scanner.getUnicodeProperties());
for (Interval interval : caselessSet.getIntervals()) {
for (int elem = interval.start; elem <= interval.end; ++elem) {
addTransition(start, classes.getClassCode(elem), end);
}
}
} else {
addTransition(start, classes.getClassCode(ch), end);
}
}
private IntPair insertStringNFA(boolean caseless, String str) {
int start = numStates;
int i = 0;
for (int pos = 0; pos < str.length(); ++i) {
int ch = str.codePointAt(pos);
if (caseless) {
IntCharSet set = IntCharSet.ofCharacter(ch);
IntCharSet caselessSet = set.getCaseless(scanner.getUnicodeProperties());
for (Interval interval : caselessSet.getIntervals()) {
for (int elem = interval.start; elem <= interval.end; ++elem) {
addTransition(i + start, classes.getClassCode(elem), i + start + 1);
}
}
} else {
addTransition(i + start, classes.getClassCode(ch), i + start + 1);
}
pos += Character.charCount(ch);
}
return IntPair.create(start, i + start);
}
private void insertClassNFA(IntCharSet set, int start, int end) {
for (int aCl : classes.getClassCodes(set, false)) {
addTransition(start, aCl, end);
}
}
private IntPair complement(IntPair nfa) {
if (Build.DEBUG) {
Out.debug("complement for " + nfa);
Out.debug("NFA is :" + Out.NL + this);
}
int dfaStart = nfa.end() + 1;
epsilonFill();
Map<StateSet, Integer> dfaStates = new HashMap<>(numStates);
List<StateSet> dfaList = new ArrayList<>(numStates);
int numDFAStates = 0;
int currentDFAState = 0;
StateSet currentState, newState;
newState = epsilon[nfa.start()];
dfaStates.put(newState, numDFAStates);
dfaList.add(newState);
if (Build.DEBUG) {
Out.debug(
"pos DFA start state is :"
+ Out.NL
+ dfaStates
+ Out.NL
+ Out.NL
+ "ordered :"
+ Out.NL
+ dfaList);
}
while (currentDFAState <= numDFAStates) {
currentState = dfaList.get(currentDFAState);
for (int input = 0; input < numInput; input++) {
newState = DFAEdge(currentState, input);
if (newState.containsElements()) {
Integer nextDFAState = dfaStates.get(newState);
if (nextDFAState != null) {
addTransition(dfaStart + currentDFAState, input, dfaStart + nextDFAState);
} else {
if (Options.dump) {
Out.print("+");
}
numDFAStates++;
dfaStates.put(newState, numDFAStates);
dfaList.add(newState);
addTransition(dfaStart + currentDFAState, input, dfaStart + numDFAStates);
}
}
}
currentDFAState++;
}
if (Build.DEBUG) {
Out.debug("dfa finished, nfa is now :" + Out.NL + this);
}
int start = dfaStart + numDFAStates + 1;
int error = dfaStart + numDFAStates + 2;
int end = dfaStart + numDFAStates + 3;
addEpsilonTransition(start, dfaStart);
for (int i = 0; i < numInput; i++) addTransition(error, i, error);
addEpsilonTransition(error, end);
for (int s = 0; s <= numDFAStates; s++) {
currentState = dfaList.get(s);
currentDFAState = dfaStart + s;
if (!currentState.hasElement(nfa.end())) addEpsilonTransition(currentDFAState, end);
for (int i = 0; i < numInput; i++)
if (table[currentDFAState][i] == null) addTransition(currentDFAState, i, error);
}
removeDead(dfaStart, end);
if (Build.DEBUG) {
Out.debug("complement finished, nfa (" + start + "," + end + ") is now :" + this);
}
return IntPair.create(start, end);
}
private void removeDead(int start, int end) {
if (Build.DEBUG) {
Out.debug("removeDead (" + start + "," + end + ") " + Out.NL + this);
}
StateSet notvisited = tempStateSet;
StateSet reachable = new StateSet(numStates, start);
notvisited.clear();
notvisited.addState(start);
while (notvisited.containsElements()) {
int state = notvisited.getAndRemoveElement();
notvisited.add(reachable.complement(epsilon[state]));
reachable.add(epsilon[state]);
for (int i = 0; i < numInput; i++) {
notvisited.add(reachable.complement(table[state][i]));
reachable.add(table[state][i]);
}
}
if (Build.DEBUG) {
Out.debug("reachable states " + reachable);
}
StateSet live = new StateSet(numStates, end);
boolean changed = true;
while (changed) {
changed = false;
Out.debug("live: " + live);
for (int s : live.complement(reachable)) {
for (int i = 0; i < numInput; i++) {
if (table[s][i] != null) {
for (int state : table[s][i]) {
if (live.hasElement(state)) {
changed = true;
live.addState(s);
}
}
}
}
if (epsilon[s] != null) {
for (int state : epsilon[s]) {
if (live.hasElement(state)) {
changed = true;
live.addState(s);
}
}
}
}
}
if (Build.DEBUG) {
Out.debug("live states: " + live);
}
if (!reachable.equals(live)) {
for (int s : reachable) {
for (int i = 0; i < numInput; i++) if (table[s][i] != null) table[s][i].intersect(live);
if (epsilon[s] != null) epsilon[s].intersect(live);
}
}
if (Build.DEBUG) {
Out.debug("Removed dead states " + Out.NL + this);
}
}
private void insertCCLNFA(RegExp regExp, int start, int end) {
switch (regExp.type) {
case sym.BAR:
RegExp2 r = (RegExp2) regExp;
insertCCLNFA(r.r1, start, end);
insertCCLNFA(r.r2, start, end);
return;
case sym.PRIMCLASS:
insertClassNFA((IntCharSet) ((RegExp1) regExp).content, start, end);
return;
case sym.CHAR:
insertLetterNFA(false, (Integer) ((RegExp1) regExp).content, start, end);
return;
case sym.CHAR_I:
insertLetterNFA(true, (Integer) ((RegExp1) regExp).content, start, end);
return;
default:
throw new RegExpException(regExp);
}
}
public IntPair insertNFA(RegExp regExp) {
IntPair nfa1, nfa2;
int start, end;
RegExp2 r;
if (Build.DEBUG) {
Out.debug("Inserting RegExp : " + regExp);
}
if (regExp.isCharClass()) {
start = numStates;
end = numStates + 1;
ensureCapacity(end + 1);
numStates = end + 1;
insertCCLNFA(regExp, start, end);
return IntPair.create(start, end);
}
switch (regExp.type) {
case sym.BAR:
r = (RegExp2) regExp;
nfa1 = insertNFA(r.r1);
nfa2 = insertNFA(r.r2);
start = nfa2.end() + 1;
end = nfa2.end() + 2;
addEpsilonTransition(start, nfa1.start());
addEpsilonTransition(start, nfa2.start());
addEpsilonTransition(nfa1.end(), end);
addEpsilonTransition(nfa2.end(), end);
return IntPair.create(start, end);
case sym.CONCAT:
r = (RegExp2) regExp;
nfa1 = insertNFA(r.r1);
nfa2 = insertNFA(r.r2);
addEpsilonTransition(nfa1.end(), nfa2.start());
return IntPair.create(nfa1.start(), nfa2.end());
case sym.STAR:
nfa1 = insertNFA((RegExp) ((RegExp1) regExp).content);
start = nfa1.end() + 1;
end = nfa1.end() + 2;
addEpsilonTransition(nfa1.end(), end);
addEpsilonTransition(start, nfa1.start());
addEpsilonTransition(start, end);
addEpsilonTransition(nfa1.end(), nfa1.start());
return IntPair.create(start, end);
case sym.PLUS:
nfa1 = insertNFA((RegExp) ((RegExp1) regExp).content);
start = nfa1.end() + 1;
end = nfa1.end() + 2;
addEpsilonTransition(nfa1.end(), end);
addEpsilonTransition(start, nfa1.start());
addEpsilonTransition(nfa1.end(), nfa1.start());
return IntPair.create(start, end);
case sym.QUESTION:
nfa1 = insertNFA((RegExp) ((RegExp1) regExp).content);
addEpsilonTransition(nfa1.start(), nfa1.end());
return IntPair.create(nfa1.start(), nfa1.end());
case sym.BANG:
return complement(insertNFA((RegExp) ((RegExp1) regExp).content));
case sym.TILDE:
return insertNFA(regExp.resolveTilde());
case sym.STRING:
return insertStringNFA(false, (String) ((RegExp1) regExp).content);
case sym.STRING_I:
return insertStringNFA(true, (String) ((RegExp1) regExp).content);
default:
throw new RegExpException(regExp);
}
}
}