/*
 * dk.brics.automaton
 * 
 * Copyright (c) 2001-2009 Anders Moeller
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.apache.lucene.util.automaton;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.LinkedList;

Operations for minimizing automata.
@lucene.experimental
/** * Operations for minimizing automata. * * @lucene.experimental */
final public class MinimizationOperations { private MinimizationOperations() {}
Minimizes (and determinizes if not already deterministic) the given automaton using Hopcroft's algorithm.
Params:
  • maxDeterminizedStates – maximum number of states determinizing the automaton can result in. Set higher to allow more complex queries and lower to prevent memory exhaustion.
/** * Minimizes (and determinizes if not already deterministic) the given * automaton using Hopcroft's algorithm. * @param maxDeterminizedStates maximum number of states determinizing the * automaton can result in. Set higher to allow more complex queries and * lower to prevent memory exhaustion. */
public static Automaton minimize(Automaton a, int maxDeterminizedStates) { if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) { // Fastmatch for common case return new Automaton(); } a = Operations.determinize(a, maxDeterminizedStates); //a.writeDot("adet"); if (a.getNumTransitions(0) == 1) { Transition t = new Transition(); a.getTransition(0, 0, t); if (t.dest == 0 && t.min == Character.MIN_CODE_POINT && t.max == Character.MAX_CODE_POINT) { // Accepts all strings return a; } } a = Operations.totalize(a); //a.writeDot("atot"); // initialize data structures final int[] sigma = a.getStartPoints(); final int sigmaLen = sigma.length, statesLen = a.getNumStates(); @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[][] reverse = (ArrayList<Integer>[][]) new ArrayList[statesLen][sigmaLen]; @SuppressWarnings({"rawtypes","unchecked"}) final HashSet<Integer>[] partition = (HashSet<Integer>[]) new HashSet[statesLen]; @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[] splitblock = (ArrayList<Integer>[]) new ArrayList[statesLen]; final int[] block = new int[statesLen]; final StateList[][] active = new StateList[statesLen][sigmaLen]; final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen]; final LinkedList<IntPair> pending = new LinkedList<>(); final BitSet pending2 = new BitSet(sigmaLen*statesLen); final BitSet split = new BitSet(statesLen), refine = new BitSet(statesLen), refine2 = new BitSet(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new ArrayList<>(); partition[q] = new HashSet<>(); for (int x = 0; x < sigmaLen; x++) { active[q][x] = new StateList(); } } // find initial partition and reverse edges Transition transition = new Transition(); for (int q = 0; q < statesLen; q++) { final int j = a.isAccept(q) ? 0 : 1; partition[j].add(q); block[q] = j; transition.source = q; transition.transitionUpto = -1; for (int x = 0; x < sigmaLen; x++) { final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])]; if (r[x] == null) { r[x] = new ArrayList<>(); } r[x].add(q); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { for (int q : partition[j]) { if (reverse[q][x] != null) { active2[q][x] = active[j][x].add(q); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { final int j = (active[0][x].size <= active[1][x].size) ? 0 : 1; pending.add(new IntPair(j, x)); pending2.set(x*statesLen + j); } // process pending until fixed point int k = 2; //System.out.println("start min"); while (!pending.isEmpty()) { //System.out.println(" cycle pending"); final IntPair ip = pending.removeFirst(); final int p = ip.n1; final int x = ip.n2; //System.out.println(" pop n1=" + ip.n1 + " n2=" + ip.n2); pending2.clear(x*statesLen + p); // find states that need to be split off their blocks for (StateListNode m = active[p][x].first; m != null; m = m.next) { final ArrayList<Integer> r = reverse[m.q][x]; if (r != null) { for (int i : r) { if (!split.get(i)) { split.set(i); final int j = block[i]; splitblock[j].add(i); if (!refine2.get(j)) { refine2.set(j); refine.set(j); } } } } } // refine blocks for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) { final ArrayList<Integer> sb = splitblock[j]; if (sb.size() < partition[j].size()) { final HashSet<Integer> b1 = partition[j]; final HashSet<Integer> b2 = partition[k]; for (int s : sb) { b1.remove(s); b2.add(s); block[s] = k; for (int c = 0; c < sigmaLen; c++) { final StateListNode sn = active2[s][c]; if (sn != null && sn.sl == active[j][c]) { sn.remove(); active2[s][c] = active[k][c].add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { final int aj = active[j][c].size, ak = active[k][c].size, ofs = c*statesLen; if (!pending2.get(ofs + j) && 0 < aj && aj <= ak) { pending2.set(ofs + j); pending.add(new IntPair(j, c)); } else { pending2.set(ofs + k); pending.add(new IntPair(k, c)); } } k++; } refine2.clear(j); for (int s : sb) { split.clear(s); } sb.clear(); } refine.clear(); } Automaton result = new Automaton(); Transition t = new Transition(); //System.out.println(" k=" + k); // make a new state for each equivalence class, set initial state int[] stateMap = new int[statesLen]; int[] stateRep = new int[k]; result.createState(); //System.out.println("min: k=" + k); for (int n = 0; n < k; n++) { //System.out.println(" n=" + n); boolean isInitial = false; for (int q : partition[n]) { if (q == 0) { isInitial = true; //System.out.println(" isInitial!"); break; } } int newState; if (isInitial) { newState = 0; } else { newState = result.createState(); } //System.out.println(" newState=" + newState); for (int q : partition[n]) { stateMap[q] = newState; //System.out.println(" q=" + q + " isAccept?=" + a.isAccept(q)); result.setAccept(newState, a.isAccept(q)); stateRep[newState] = q; // select representative } } // build transitions and set acceptance for (int n = 0; n < k; n++) { int numTransitions = a.initTransition(stateRep[n], t); for(int i=0;i<numTransitions;i++) { a.getNextTransition(t); //System.out.println(" add trans"); result.addTransition(n, stateMap[t.dest], t.min, t.max); } } result.finishState(); //System.out.println(result.getNumStates() + " states"); return Operations.removeDeadStates(result); } static final class IntPair { final int n1, n2; IntPair(int n1, int n2) { this.n1 = n1; this.n2 = n2; } } static final class StateList { int size; StateListNode first, last; StateListNode add(int q) { return new StateListNode(q, this); } } static final class StateListNode { final int q; StateListNode next, prev; final StateList sl; StateListNode(int q, StateList sl) { this.q = q; this.sl = sl; if (sl.size++ == 0) sl.first = sl.last = this; else { sl.last.next = this; prev = sl.last; sl.last = this; } } void remove() { sl.size--; if (sl.first == this) sl.first = next; else prev.next = next; if (sl.last == this) sl.last = prev; else next.prev = prev; } } }