package org.apache.lucene.util.graph;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.FiniteStringsIterator;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
public final class GraphTokenStreamFiniteStrings {
private final Map<Integer, BytesRef> idToTerm = new HashMap<>();
private final Map<Integer, Integer> idToInc = new HashMap<>();
private final Automaton det;
private final Transition transition = new Transition();
private class FiniteStringsTokenStream extends TokenStream {
private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private final IntsRef ids;
private final int end;
private int offset;
FiniteStringsTokenStream(final IntsRef ids) {
assert ids != null;
this.ids = ids;
this.offset = ids.offset;
this.end = ids.offset + ids.length;
}
@Override
public boolean incrementToken() throws IOException {
if (offset < end) {
clearAttributes();
int id = ids.ints[offset];
termAtt.setBytesRef(idToTerm.get(id));
int incr = 1;
if (idToInc.containsKey(id)) {
incr = idToInc.get(id);
}
posIncAtt.setPositionIncrement(incr);
offset++;
return true;
}
return false;
}
}
public GraphTokenStreamFiniteStrings(TokenStream in) throws IOException {
Automaton aut = build(in);
this.det = Operations.removeDeadStates(Operations.determinize(aut, DEFAULT_MAX_DETERMINIZED_STATES));
}
public boolean hasSidePath(int state) {
int numT = det.initTransition(state, transition);
if (numT <= 1) {
return false;
}
det.getNextTransition(transition);
int dest = transition.dest;
for (int i = 1; i < numT; i++) {
det.getNextTransition(transition);
if (dest != transition.dest) {
return true;
}
}
return false;
}
public Term[] getTerms(String field, int state) {
int numT = det.initTransition(state, transition);
List<Term> terms = new ArrayList<> ();
for (int i = 0; i < numT; i++) {
det.getNextTransition(transition);
for (int id = transition.min; id <= transition.max; id++) {
Term term = new Term(field, idToTerm.get(id));
terms.add(term);
}
}
return terms.toArray(new Term[terms.size()]);
}
public Iterator<TokenStream> getFiniteStrings() throws IOException {
return getFiniteStrings(0, -1);
}
public Iterator<TokenStream> getFiniteStrings(int startState, int endState) throws IOException {
final FiniteStringsIterator it = new FiniteStringsIterator(det, startState, endState);
return new Iterator<TokenStream> () {
IntsRef current;
boolean finished = false;
@Override
public boolean hasNext() {
if (finished == false && current == null) {
current = it.next();
if (current == null) {
finished = true;
}
}
return current != null;
}
@Override
public TokenStream next() {
if (current == null) {
hasNext();
}
TokenStream next = new FiniteStringsTokenStream(current);
current = null;
return next;
}
};
}
public int[] articulationPoints() {
if (det.getNumStates() == 0) {
return new int[0];
}
Automaton.Builder undirect = new Automaton.Builder();
undirect.copy(det);
for (int i = 0; i < det.getNumStates(); i++) {
int numT = det.initTransition(i, transition);
for (int j = 0; j < numT; j++) {
det.getNextTransition(transition);
undirect.addTransition(transition.dest, i, transition.min);
}
}
int numStates = det.getNumStates();
BitSet visited = new BitSet(numStates);
int[] depth = new int[det.getNumStates()];
int[] low = new int[det.getNumStates()];
int[] parent = new int[det.getNumStates()];
Arrays.fill(parent, -1);
List<Integer> points = new ArrayList<>();
articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points);
Collections.reverse(points);
return points.stream().mapToInt(p -> p).toArray();
}
private Automaton build(final TokenStream in) throws IOException {
Automaton.Builder builder = new Automaton.Builder();
final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
in.reset();
int pos = -1;
int prevIncr = 1;
int state = -1;
int gap = 0;
while (in.incrementToken()) {
int currentIncr = posIncAtt.getPositionIncrement();
if (pos == -1 && currentIncr < 1) {
throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
}
if (currentIncr == 0) {
if (gap > 0) {
pos -= gap;
}
}
else {
pos++;
gap = currentIncr - 1;
}
int endPos = pos + posLengthAtt.getPositionLength() + gap;
while (state < endPos) {
state = builder.createState();
}
BytesRef term = termBytesAtt.getBytesRef();
int id = getTermID(currentIncr, prevIncr, term);
builder.addTransition(pos, endPos, id);
pos += gap;
if (currentIncr > 0) {
prevIncr = currentIncr;
}
}
in.end();
if (state != -1) {
builder.setAccept(state, true);
}
return builder.finish();
}
private int getTermID(int incr, int prevIncr, BytesRef term) {
assert term != null;
boolean isStackedGap = incr == 0 && prevIncr > 1;
int id = idToTerm.size();
idToTerm.put(id, BytesRef.deepCopyOf(term));
if (isStackedGap) {
idToInc.put(id, prevIncr);
} else if (incr > 1) {
idToInc.put(id, incr);
}
return id;
}
private static void articulationPointsRecurse(Automaton a, int state, int d, int[] depth, int[] low, int[] parent,
BitSet visited, List<Integer> points) {
visited.set(state);
depth[state] = d;
low[state] = d;
int childCount = 0;
boolean isArticulation = false;
Transition t = new Transition();
int numT = a.initTransition(state, t);
for (int i = 0; i < numT; i++) {
a.getNextTransition(t);
if (visited.get(t.dest) == false) {
parent[t.dest] = state;
articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points);
childCount++;
if (low[t.dest] >= depth[state]) {
isArticulation = true;
}
low[state] = Math.min(low[state], low[t.dest]);
} else if (t.dest != parent[state]) {
low[state] = Math.min(low[state], depth[t.dest]);
}
}
if ((parent[state] != -1 && isArticulation) || (parent[state] == -1 && childCount > 1)) {
points.add(state);
}
}
}