/*
* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sun.org.apache.xerces.internal.impl.xs.models;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;
import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
import com.sun.org.apache.xerces.internal.xni.QName;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
DFAContentModel is the implementation of XSCMValidator that does
all of the non-trivial element content validation. This class does
the conversion from the regular expression to the DFA that
it then uses in its validation algorithm.
Author: Neil Graham, IBM @xerces.internal @LastModified : Oct 2017
/**
* DFAContentModel is the implementation of XSCMValidator that does
* all of the non-trivial element content validation. This class does
* the conversion from the regular expression to the DFA that
* it then uses in its validation algorithm.
*
* @xerces.internal
*
* @author Neil Graham, IBM
* @LastModified: Oct 2017
*/
public class XSDFACM
implements XSCMValidator {
//
// Constants
//
private static final boolean DEBUG = false;
// special strings
// debugging
Set to true to debug content model validation. /** Set to true to debug content model validation. */
private static final boolean DEBUG_VALIDATE_CONTENT = false;
//
// Data
//
This is the map of unique input symbol elements to indices into
each state's per-input symbol transition table entry. This is part
of the built DFA information that must be kept around to do the
actual validation. Note tat since either XSElementDecl or XSParticleDecl object
can live here, we've got to use an Object.
/**
* This is the map of unique input symbol elements to indices into
* each state's per-input symbol transition table entry. This is part
* of the built DFA information that must be kept around to do the
* actual validation. Note tat since either XSElementDecl or XSParticleDecl object
* can live here, we've got to use an Object.
*/
private Object fElemMap[] = null;
This is a map of whether the element map contains information
related to ANY models.
/**
* This is a map of whether the element map contains information
* related to ANY models.
*/
private int fElemMapType[] = null;
id of the unique input symbol
/**
* id of the unique input symbol
*/
private int fElemMapId[] = null;
The element map size. /** The element map size. */
private int fElemMapSize = 0;
This is an array of booleans, one per state (there are
fTransTableSize states in the DFA) that indicates whether that
state is a final state.
/**
* This is an array of booleans, one per state (there are
* fTransTableSize states in the DFA) that indicates whether that
* state is a final state.
*/
private boolean fFinalStateFlags[] = null;
The list of follow positions for each NFA position (i.e. for each
non-epsilon leaf node.) This is only used during the building of
the DFA, and is let go afterwards.
/**
* The list of follow positions for each NFA position (i.e. for each
* non-epsilon leaf node.) This is only used during the building of
* the DFA, and is let go afterwards.
*/
private CMStateSet fFollowList[] = null;
This is the head node of our intermediate representation. It is
only non-null during the building of the DFA (just so that it
does not have to be passed all around.) Once the DFA is built,
this is no longer required so its nulled out.
/**
* This is the head node of our intermediate representation. It is
* only non-null during the building of the DFA (just so that it
* does not have to be passed all around.) Once the DFA is built,
* this is no longer required so its nulled out.
*/
private CMNode fHeadNode = null;
The count of leaf nodes. This is an important number that set some
limits on the sizes of data structures in the DFA process.
/**
* The count of leaf nodes. This is an important number that set some
* limits on the sizes of data structures in the DFA process.
*/
private int fLeafCount = 0;
An array of non-epsilon leaf nodes, which is used during the DFA
build operation, then dropped.
/**
* An array of non-epsilon leaf nodes, which is used during the DFA
* build operation, then dropped.
*/
private XSCMLeaf fLeafList[] = null;
Array mapping ANY types to the leaf list. /** Array mapping ANY types to the leaf list. */
private int fLeafListType[] = null;
This is the transition table that is the main by product of all
of the effort here. It is an array of arrays of ints. The first
dimension is the number of states we end up with in the DFA. The
second dimensions is the number of unique elements in the content
model (fElemMapSize). Each entry in the second dimension indicates
the new state given that input for the first dimension's start
state.
The fElemMap array handles mapping from element indexes to
positions in the second dimension of the transition table.
/**
* This is the transition table that is the main by product of all
* of the effort here. It is an array of arrays of ints. The first
* dimension is the number of states we end up with in the DFA. The
* second dimensions is the number of unique elements in the content
* model (fElemMapSize). Each entry in the second dimension indicates
* the new state given that input for the first dimension's start
* state.
* <p>
* The fElemMap array handles mapping from element indexes to
* positions in the second dimension of the transition table.
*/
private int fTransTable[][] = null;
Array containing occurence information for looping states
which use counters to check minOccurs/maxOccurs.
/**
* Array containing occurence information for looping states
* which use counters to check minOccurs/maxOccurs.
*/
private Occurence [] fCountingStates = null;
static final class Occurence {
final int minOccurs;
final int maxOccurs;
final int elemIndex;
public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) {
minOccurs = leaf.getMinOccurs();
maxOccurs = leaf.getMaxOccurs();
this.elemIndex = elemIndex;
}
public String toString() {
return "minOccurs=" + minOccurs
+ ";maxOccurs=" +
((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED)
? Integer.toString(maxOccurs) : "unbounded");
}
}
The number of valid entries in the transition table, and in the other
related tables such as fFinalStateFlags.
/**
* The number of valid entries in the transition table, and in the other
* related tables such as fFinalStateFlags.
*/
private int fTransTableSize = 0;
private boolean fIsCompactedForUPA;
Array of counters for all the for elements (or wildcards)
of the form a{n,m} where n > 1 and m <= unbounded. Used
to count the a's to later check against n and m. Counter
set to -1 if element (or wildcard) not optimized by
constant space algorithm.
/**
* Array of counters for all the for elements (or wildcards)
* of the form a{n,m} where n > 1 and m <= unbounded. Used
* to count the a's to later check against n and m. Counter
* set to -1 if element (or wildcard) not optimized by
* constant space algorithm.
*/
private int fElemMapCounter[];
Array of lower bounds for all the for elements (or wildcards)
of the form a{n,m} where n > 1 and m <= unbounded. This array
stores the n's for those elements (or wildcards) for which
the constant space algorithm applies (or -1 otherwise).
/**
* Array of lower bounds for all the for elements (or wildcards)
* of the form a{n,m} where n > 1 and m <= unbounded. This array
* stores the n's for those elements (or wildcards) for which
* the constant space algorithm applies (or -1 otherwise).
*/
private int fElemMapCounterLowerBound[];
Array of upper bounds for all the for elements (or wildcards)
of the form a{n,m} where n > 1 and m <= unbounded. This array
stores the n's for those elements (or wildcards) for which
the constant space algorithm applies, or -1 if algorithm does
not apply or m = unbounded.
/**
* Array of upper bounds for all the for elements (or wildcards)
* of the form a{n,m} where n > 1 and m <= unbounded. This array
* stores the n's for those elements (or wildcards) for which
* the constant space algorithm applies, or -1 if algorithm does
* not apply or m = unbounded.
*/
private int fElemMapCounterUpperBound[]; // -1 if no upper bound
// temp variables
//
// Constructors
//
Constructs a DFA content model.
Params: - syntaxTree – The syntax tree of the content model.
- leafCount – The number of leaves.
Throws: - RuntimeException – Thrown if DFA can't be built.
/**
* Constructs a DFA content model.
*
* @param syntaxTree The syntax tree of the content model.
* @param leafCount The number of leaves.
*
* @exception RuntimeException Thrown if DFA can't be built.
*/
public XSDFACM(CMNode syntaxTree, int leafCount) {
// Store away our index and pools in members
fLeafCount = leafCount;
fIsCompactedForUPA = syntaxTree.isCompactedForUPA();
//
// Create some string pool indexes that represent the names of some
// magical nodes in the syntax tree.
// (already done in static initialization...
//
//
// Ok, so lets grind through the building of the DFA. This method
// handles the high level logic of the algorithm, but it uses a
// number of helper classes to do its thing.
//
// In order to avoid having hundreds of references to the error and
// string handlers around, this guy and all of his helper classes
// just throw a simple exception and we then pass it along.
//
if(DEBUG_VALIDATE_CONTENT) {
XSDFACM.time -= System.currentTimeMillis();
}
buildDFA(syntaxTree);
if(DEBUG_VALIDATE_CONTENT) {
XSDFACM.time += System.currentTimeMillis();
System.out.println("DFA build: " + XSDFACM.time + "ms");
}
}
private static long time = 0;
//
// XSCMValidator methods
//
check whether the given state is one of the final states
Params: - state – the state to check
Returns: whether it's a final state
/**
* check whether the given state is one of the final states
*
* @param state the state to check
*
* @return whether it's a final state
*/
public boolean isFinalState (int state) {
return (state < 0)? false :
fFinalStateFlags[state];
}
one transition only
Params: - curElem – The current element's QName
- state – stack to store the previous state
- subGroupHandler – the substitution group handler
Throws: - RuntimeException – thrown on error
Returns: null if transition is invalid; otherwise the Object corresponding to the
XSElementDecl or XSWildcardDecl identified. Also, the
state array will be modified to include the new state; this so that the validator can
store it away.
/**
* one transition only
*
* @param curElem The current element's QName
* @param state stack to store the previous state
* @param subGroupHandler the substitution group handler
*
* @return null if transition is invalid; otherwise the Object corresponding to the
* XSElementDecl or XSWildcardDecl identified. Also, the
* state array will be modified to include the new state; this so that the validator can
* store it away.
*
* @exception RuntimeException thrown on error
*/
public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) {
int curState = state[0];
if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
// there was an error last time; so just go find correct Object in fElemmMap.
// ... after resetting state[0].
if(curState == XSCMValidator.FIRST_ERROR)
state[0] = XSCMValidator.SUBSEQUENT_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
int nextState = 0;
int elemIndex = 0;
Object matchingDecl = null;
for (; elemIndex < fElemMapSize; elemIndex++) {
nextState = fTransTable[curState][elemIndex];
if (nextState == -1)
continue;
int type = fElemMapType[elemIndex] ;
if (type == XSParticleDecl.PARTICLE_ELEMENT) {
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
if (matchingDecl != null) {
// Increment counter if constant space algorithm applies
if (fElemMapCounter[elemIndex] >= 0) {
fElemMapCounter[elemIndex]++;
}
break;
}
}
else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
matchingDecl = fElemMap[elemIndex];
// Increment counter if constant space algorithm applies
if (fElemMapCounter[elemIndex] >= 0) {
fElemMapCounter[elemIndex]++;
}
break;
}
}
}
// if we still can't find a match, set the state to first_error
// and return null
if (elemIndex == fElemMapSize) {
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
if (fCountingStates != null) {
Occurence o = fCountingStates[curState];
if (o != null) {
if (curState == nextState) {
if (++state[2] > o.maxOccurs &&
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
// It's likely that we looped too many times on the current state
// however it's possible that we actually matched another particle
// which allows the same name.
//
// Consider:
//
// <xs:sequence>
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
// <xs:element name="foo" type="xs:string" fixed="bar"/>
// </xs:sequence>
//
// and
//
// <xs:sequence>
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
// <xs:any namespace="##any" processContents="skip"/>
// </xs:sequence>
//
// In the DFA there will be two transitions from the current state which
// allow "foo". Note that this is not a UPA violation. The ambiguity of which
// transition to take is resolved by the current value of the counter. Since
// we've already seen enough instances of the first "foo" perhaps there is
// another element declaration or wildcard deeper in the element map which
// matches.
return findMatchingDecl(curElem, state, subGroupHandler, elemIndex);
}
}
else if (state[2] < o.minOccurs) {
// not enough loops on the current state.
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
else {
// Exiting a counting state. If we're entering a new
// counting state, reset the counter.
o = fCountingStates[nextState];
if (o != null) {
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
}
}
else {
o = fCountingStates[nextState];
if (o != null) {
// Entering a new counting state. Reset the counter.
// If we've already seen one instance of the looping
// particle set the counter to 1, otherwise set it
// to 0.
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
}
}
state[0] = nextState;
return matchingDecl;
} // oneTransition(QName, int[], SubstitutionGroupHandler): Object
Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
Object matchingDecl = null;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
int type = fElemMapType[elemIndex] ;
if (type == XSParticleDecl.PARTICLE_ELEMENT) {
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
if (matchingDecl != null) {
return matchingDecl;
}
}
else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri))
return fElemMap[elemIndex];
}
}
return null;
} // findMatchingDecl(QName, SubstitutionGroupHandler): Object
Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) {
int curState = state[0];
int nextState = 0;
Object matchingDecl = null;
while (++elemIndex < fElemMapSize) {
nextState = fTransTable[curState][elemIndex];
if (nextState == -1)
continue;
int type = fElemMapType[elemIndex] ;
if (type == XSParticleDecl.PARTICLE_ELEMENT) {
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
if (matchingDecl != null) {
break;
}
}
else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
matchingDecl = fElemMap[elemIndex];
break;
}
}
}
// if we still can't find a match, set the state to FIRST_ERROR and return null
if (elemIndex == fElemMapSize) {
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
// if we found a match, set the next state and reset the
// counter if the next state is a counting state.
state[0] = nextState;
final Occurence o = fCountingStates[nextState];
if (o != null) {
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
return matchingDecl;
} // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object
// This method returns the start states of the content model.
public int[] startContentModel() {
// Clear all constant space algorithm counters in use
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
if (fElemMapCounter[elemIndex] != -1) {
fElemMapCounter[elemIndex] = 0;
}
}
// [0] : the current state
// [1] : if [0] is an error state then the
// last valid state before the error
// [2] : occurence counter for counting states
return new int [3];
} // startContentModel():int[]
// this method returns whether the last state was a valid final state
public boolean endContentModel(int[] state) {
final int curState = state[0];
if (fFinalStateFlags[curState]) {
if (fCountingStates != null) {
Occurence o = fCountingStates[curState];
if (o != null && state[2] < o.minOccurs) {
// not enough loops on the current state to be considered final.
return false;
}
}
return true;
}
return false;
} // endContentModel(int[]): boolean
// Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
// but we can put it back later.
//
// Private methods
//
Builds the internal DFA transition table from the given syntax tree.
Params: - syntaxTree – The syntax tree.
Throws: - RuntimeException – Thrown if DFA cannot be built.
/**
* Builds the internal DFA transition table from the given syntax tree.
*
* @param syntaxTree The syntax tree.
*
* @exception RuntimeException Thrown if DFA cannot be built.
*/
private void buildDFA(CMNode syntaxTree) {
//
// The first step we need to take is to rewrite the content model
// using our CMNode objects, and in the process get rid of any
// repetition short cuts, converting them into '*' style repetitions
// or getting rid of repetitions altogether.
//
// The conversions done are:
//
// x+ -> (x|x*)
// x? -> (x|epsilon)
//
// This is a relatively complex scenario. What is happening is that
// we create a top level binary node of which the special EOC value
// is set as the right side node. The the left side is set to the
// rewritten syntax tree. The source is the original content model
// info from the decl pool. The rewrite is done by buildSyntaxTree()
// which recurses the decl pool's content of the element and builds
// a new tree in the process.
//
// Note that, during this operation, we set each non-epsilon leaf
// node's DFA state position and count the number of such leafs, which
// is left in the fLeafCount member.
//
// The nodeTmp object is passed in just as a temp node to use during
// the recursion. Otherwise, we'd have to create a new node on every
// level of recursion, which would be piggy in Java (as is everything
// for that matter.)
//
/* MODIFIED (Jan, 2001)
*
* Use following rules.
* nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
* nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
*
* The same computation of follow as x* is applied to x+
*
* The modification drastically reduces computation time of
* "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
*/
//
// And handle specially the EOC node, which also must be numbered
// and counted as a non-epsilon leaf node. It could not be handled
// in the above tree build because it was created before all that
// started. We save the EOC position since its used during the DFA
// building loop.
//
int EOCPos = fLeafCount;
XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
fHeadNode = new XSCMBinOp(
XSModelGroupImpl.MODELGROUP_SEQUENCE,
syntaxTree,
nodeEOC
);
//
// Ok, so now we have to iterate the new tree and do a little more
// work now that we know the leaf count. One thing we need to do is
// to calculate the first and last position sets of each node. This
// is cached away in each of the nodes.
//
// Along the way we also set the leaf count in each node as the
// maximum state count. They must know this in order to create their
// first/last pos sets.
//
// We also need to build an array of references to the non-epsilon
// leaf nodes. Since we iterate it in the same way as before, this
// will put them in the array according to their position values.
//
fLeafList = new XSCMLeaf[fLeafCount];
fLeafListType = new int[fLeafCount];
postTreeBuildInit(fHeadNode);
//
// And, moving onward... We now need to build the follow position
// sets for all the nodes. So we allocate an array of state sets,
// one for each leaf node (i.e. each DFA position.)
//
fFollowList = new CMStateSet[fLeafCount];
for (int index = 0; index < fLeafCount; index++)
fFollowList[index] = new CMStateSet(fLeafCount);
calcFollowList(fHeadNode);
//
// And finally the big push... Now we build the DFA using all the
// states and the tree we've built up. First we set up the various
// data structures we are going to use while we do this.
//
// First of all we need an array of unique element names in our
// content model. For each transition table entry, we need a set of
// contiguous indices to represent the transitions for a particular
// input element. So we need to a zero based range of indexes that
// map to element types. This element map provides that mapping.
//
fElemMap = new Object[fLeafCount];
fElemMapType = new int[fLeafCount];
fElemMapId = new int[fLeafCount];
fElemMapCounter = new int[fLeafCount];
fElemMapCounterLowerBound = new int[fLeafCount];
fElemMapCounterUpperBound = new int[fLeafCount];
fElemMapSize = 0;
Occurence [] elemOccurenceMap = null;
for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
// optimization from Henry Zongaro:
//fElemMap[outIndex] = new Object ();
fElemMap[outIndex] = null;
int inIndex = 0;
final int id = fLeafList[outIndex].getParticleId();
for (; inIndex < fElemMapSize; inIndex++) {
if (id == fElemMapId[inIndex])
break;
}
// If it was not in the list, then add it, if not the EOC node
if (inIndex == fElemMapSize) {
XSCMLeaf leaf = fLeafList[outIndex];
fElemMap[fElemMapSize] = leaf.getLeaf();
if (leaf instanceof XSCMRepeatingLeaf) {
if (elemOccurenceMap == null) {
elemOccurenceMap = new Occurence[fLeafCount];
}
elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
}
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
fElemMapId[fElemMapSize] = id;
// Init counters and bounds for a{n,m} algorithm
int[] bounds = (int[]) leaf.getUserData();
if (bounds != null) {
fElemMapCounter[fElemMapSize] = 0;
fElemMapCounterLowerBound[fElemMapSize] = bounds[0];
fElemMapCounterUpperBound[fElemMapSize] = bounds[1];
} else {
fElemMapCounter[fElemMapSize] = -1;
fElemMapCounterLowerBound[fElemMapSize] = -1;
fElemMapCounterUpperBound[fElemMapSize] = -1;
}
fElemMapSize++;
}
}
// the last entry in the element map must be the EOC element.
// remove it from the map.
if (DEBUG) {
if (fElemMapId[fElemMapSize-1] != -1)
System.err.println("interal error in DFA: last element is not EOC.");
}
fElemMapSize--;
/***
* Optimization(Jan, 2001); We sort fLeafList according to
* elemIndex which is *uniquely* associated to each leaf.
* We are *assuming* that each element appears in at least one leaf.
**/
int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
int fSortCount = 0;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
final int id = fElemMapId[elemIndex];
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
if (id == fLeafList[leafIndex].getParticleId())
fLeafSorter[fSortCount++] = leafIndex;
}
fLeafSorter[fSortCount++] = -1;
}
/* Optimization(Jan, 2001) */
//
// Next lets create some arrays, some that hold transient
// information during the DFA build and some that are permament.
// These are kind of sticky since we cannot know how big they will
// get, but we don't want to use any Java collections because of
// performance.
//
// Basically they will probably be about fLeafCount*2 on average,
// but can be as large as 2^(fLeafCount*2), worst case. So we start
// with fLeafCount*4 as a middle ground. This will be very unlikely
// to ever have to expand, though it if does, the overhead will be
// somewhat ugly.
//
int curArraySize = fLeafCount * 4;
CMStateSet[] statesToDo = new CMStateSet[curArraySize];
fFinalStateFlags = new boolean[curArraySize];
fTransTable = new int[curArraySize][];
//
// Ok we start with the initial set as the first pos set of the
// head node (which is the seq node that holds the content model
// and the EOC node.)
//
CMStateSet setT = fHeadNode.firstPos();
//
// Init our two state flags. Basically the unmarked state counter
// is always chasing the current state counter. When it catches up,
// that means we made a pass through that did not add any new states
// to the lists, at which time we are done. We could have used a
// expanding array of flags which we used to mark off states as we
// complete them, but this is easier though less readable maybe.
//
int unmarkedState = 0;
int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
/* Optimization(Jan, 2001); This is faster for
* a large content model such as, "(t001+|t002+|.... |t500+)".
*/
Map<CMStateSet, Integer> stateTable = new HashMap<>();
/* Optimization(Jan, 2001) */
//
// Ok, almost done with the algorithm... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
while (unmarkedState < curState) {
//
// Get the first unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
int[] transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Loop through each possible input symbol in the element map
CMStateSet newSet = null;
/* Optimization(Jan, 2001) */
int sorterIndex = 0;
/* Optimization(Jan, 2001) */
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
//
// Build up a set of states which is the union of all of
// the follow sets of DFA positions that are in the current
// state. If we gave away the new set last time through then
// create a new one. Otherwise, zero out the existing one.
//
if (newSet == null)
newSet = new CMStateSet(fLeafCount);
else
newSet.zeroBits();
/* Optimization(Jan, 2001) */
int leafIndex = fLeafSorter[sorterIndex++];
while (leafIndex != -1) {
// If this leaf index (DFA position) is in the current set...
if (setT.getBit(leafIndex)) {
//
// If this leaf is the current input symbol, then we
// want to add its follow list to the set of states to
// transition to from the current state.
//
newSet.union(fFollowList[leafIndex]);
}
leafIndex = fLeafSorter[sorterIndex++];
}
/* Optimization(Jan, 2001) */
//
// If this new set is not empty, then see if its in the list
// of states to do. If not, then add it.
//
if (!newSet.isEmpty()) {
//
// Search the 'states to do' list to see if this new
// state set is already in there.
//
/* Optimization(Jan, 2001) */
Integer stateObj = stateTable.get(newSet);
int stateIndex = (stateObj == null ? curState : stateObj);
/* Optimization(Jan, 2001) */
// If we did not find it, then add it
if (stateIndex == curState) {
//
// Put this new state into the states to do and init
// a new entry at the same index in the transition
// table.
//
statesToDo[curState] = newSet;
fTransTable[curState] = makeDefStateList();
/* Optimization(Jan, 2001) */
stateTable.put(newSet, curState);
/* Optimization(Jan, 2001) */
// We now have a new state to do so bump the count
curState++;
//
// Null out the new set to indicate we adopted it.
// This will cause the creation of a new set on the
// next time around the loop.
//
newSet = null;
}
//
// Now set this state in the transition table's entry
// for this element (using its index), with the DFA
// state we will move to from the current state when we
// see this input element.
//
transEntry[elemIndex] = stateIndex;
// Expand the arrays if we're full
if (curState == curArraySize) {
//
// Yikes, we overflowed the initial array size, so
// we've got to expand all of these arrays. So adjust
// up the size by 50% and allocate new arrays.
//
final int newSize = (int)(curArraySize * 1.5);
CMStateSet[] newToDo = new CMStateSet[newSize];
boolean[] newFinalFlags = new boolean[newSize];
int[][] newTransTable = new int[newSize][];
// Copy over all of the existing content
System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
// Store the new array size
curArraySize = newSize;
statesToDo = newToDo;
fFinalStateFlags = newFinalFlags;
fTransTable = newTransTable;
}
}
}
}
//
// Fill in the occurence information for each looping state
// if we're using counters.
//
if (elemOccurenceMap != null) {
fCountingStates = new Occurence[curState];
for (int i = 0; i < curState; ++i) {
int [] transitions = fTransTable[i];
for (int j = 0; j < transitions.length; ++j) {
if (i == transitions[j]) {
fCountingStates[i] = elemOccurenceMap[j];
break;
}
}
}
}
//
// And now we can say bye bye to the temp representation since we've
// built the DFA.
//
if (DEBUG_VALIDATE_CONTENT)
dumpTree(fHeadNode, 0);
fHeadNode = null;
fLeafList = null;
fFollowList = null;
fLeafListType = null;
fElemMapId = null;
}
Calculates the follow list of the current node.
Params: - nodeCur – The curent node.
Throws: - RuntimeException – Thrown if follow list cannot be calculated.
/**
* Calculates the follow list of the current node.
*
* @param nodeCur The curent node.
*
* @exception RuntimeException Thrown if follow list cannot be calculated.
*/
private void calcFollowList(CMNode nodeCur) {
// Recurse as required
if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
// Recurse only
calcFollowList(((XSCMBinOp)nodeCur).getLeft());
calcFollowList(((XSCMBinOp)nodeCur).getRight());
}
else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
// Recurse first
calcFollowList(((XSCMBinOp)nodeCur).getLeft());
calcFollowList(((XSCMBinOp)nodeCur).getRight());
//
// Now handle our level. We use our left child's last pos
// set and our right child's first pos set, so go ahead and
// get them ahead of time.
//
final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos();
final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos();
//
// Now, for every position which is in our left child's last set
// add all of the states in our right child's first set to the
// follow set for that position.
//
for (int index = 0; index < fLeafCount; index++) {
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
|| nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
// Recurse first
calcFollowList(((XSCMUniOp)nodeCur).getChild());
//
// Now handle our level. We use our own first and last position
// sets, so get them up front.
//
final CMStateSet first = nodeCur.firstPos();
final CMStateSet last = nodeCur.lastPos();
//
// For every position which is in our last position set, add all
// of our first position states to the follow set for that
// position.
//
for (int index = 0; index < fLeafCount; index++) {
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
// Recurse only
calcFollowList(((XSCMUniOp)nodeCur).getChild());
}
}
Dumps the tree of the current node to standard output.
Params: - nodeCur – The current node.
- level – The maximum levels to output.
Throws: - RuntimeException – Thrown on error.
/**
* Dumps the tree of the current node to standard output.
*
* @param nodeCur The current node.
* @param level The maximum levels to output.
*
* @exception RuntimeException Thrown on error.
*/
private void dumpTree(CMNode nodeCur, int level) {
for (int index = 0; index < level; index++)
System.out.print(" ");
int type = nodeCur.type();
switch(type ) {
case XSModelGroupImpl.MODELGROUP_CHOICE:
case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
System.out.print("Choice Node ");
else
System.out.print("Seq Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1);
dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1);
break;
}
case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
case XSParticleDecl.PARTICLE_ONE_OR_MORE:
case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
System.out.print("Rep Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1);
break;
}
case XSParticleDecl.PARTICLE_ELEMENT: {
System.out.print
(
"Leaf: (pos="
+ ((XSCMLeaf)nodeCur).getPosition()
+ "), "
+ "(elemIndex="
+ ((XSCMLeaf)nodeCur).getLeaf()
+ ") "
);
if (nodeCur.isNullable())
System.out.print(" Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
break;
}
case XSParticleDecl.PARTICLE_WILDCARD:
System.out.print("Any Node: ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
break;
default: {
throw new RuntimeException("ImplementationMessages.VAL_NIICM");
}
}
}
-1 is used to represent bad transitions in the transition table
entry for each state. So each entry is initialized to an all -1
array. This method creates a new entry and initializes it.
/**
* -1 is used to represent bad transitions in the transition table
* entry for each state. So each entry is initialized to an all -1
* array. This method creates a new entry and initializes it.
*/
private int[] makeDefStateList()
{
int[] retArray = new int[fElemMapSize];
for (int index = 0; index < fElemMapSize; index++)
retArray[index] = -1;
return retArray;
}
Post tree build initialization. /** Post tree build initialization. */
private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
// Set the maximum states on this node
nodeCur.setMaxStates(fLeafCount);
XSCMLeaf leaf = null;
int pos = 0;
// Recurse as required
if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
leaf = (XSCMLeaf)nodeCur;
pos = leaf.getPosition();
fLeafList[pos] = leaf;
fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
}
else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
(nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft());
postTreeBuildInit(((XSCMBinOp)nodeCur).getRight());
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
postTreeBuildInit(((XSCMUniOp)nodeCur).getChild());
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
// Put this node in the leaf list at the current index if its
// a non-epsilon leaf.
leaf = (XSCMLeaf)nodeCur;
pos = leaf.getPosition();
fLeafList[pos] = leaf;
fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
}
else {
throw new RuntimeException("ImplementationMessages.VAL_NIICM");
}
}
check whether this content violates UPA constraint.
Params: - subGroupHandler – the substitution group handler
Returns: true if this content model contains other or list wildcard
/**
* check whether this content violates UPA constraint.
*
* @param subGroupHandler the substitution group handler
* @return true if this content model contains other or list wildcard
*/
public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException {
// Unique Particle Attribution
// store the conflict results between any two elements in fElemMap
// 0: not compared; -1: no conflict; 1: conflict
// initialize the conflict table (all 0 initially)
byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];
// for each state, check whether it has overlap transitions
for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) {
for (int j = 0; j < fElemMapSize; j++) {
for (int k = j+1; k < fElemMapSize; k++) {
if (fTransTable[i][j] != -1 &&
fTransTable[i][k] != -1) {
if (conflictTable[j][k] == 0) {
if (XSConstraints.overlapUPA
(fElemMap[j], fElemMap[k],
subGroupHandler)) {
if (fCountingStates != null) {
Occurence o = fCountingStates[i];
// If "i" is a counting state and exactly one of the transitions
// loops back to "i" then the two particles do not overlap if
// minOccurs == maxOccurs.
if (o != null &&
fTransTable[i][j] == i ^ fTransTable[i][k] == i &&
o.minOccurs == o.maxOccurs) {
conflictTable[j][k] = (byte) -1;
continue;
}
}
conflictTable[j][k] = (byte) 1;
}
else {
conflictTable[j][k] = (byte) -1;
}
}
}
}
}
}
// report all errors
for (int i = 0; i < fElemMapSize; i++) {
for (int j = 0; j < fElemMapSize; j++) {
if (conflictTable[i][j] == 1) {
//errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
// fElemMap[j].toString()});
// REVISIT: do we want to report all errors? or just one?
throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(),
fElemMap[j].toString()});
}
}
}
// if there is a other or list wildcard, we need to check this CM
// again, if this grammar is cached.
for (int i = 0; i < fElemMapSize; i++) {
if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i];
if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
return true;
}
}
}
return false;
}
Check which elements are valid to appear at this point. This method also
works if the state is in error, in which case it returns what should
have been seen.
Params: - state – the current state
Returns: a list whose entries are instances of
either XSWildcardDecl or XSElementDecl.
/**
* Check which elements are valid to appear at this point. This method also
* works if the state is in error, in which case it returns what should
* have been seen.
*
* @param state the current state
* @return a list whose entries are instances of
* either XSWildcardDecl or XSElementDecl.
*/
public List<Object> whatCanGoHere(int[] state) {
int curState = state[0];
if (curState < 0)
curState = state[1];
Occurence o = (fCountingStates != null) ?
fCountingStates[curState] : null;
int count = state[2];
// Can be XSElementDecl or XSWildcardDecl, but eventually the content is
// only used to evaluate toString
List<Object> ret = new ArrayList<>();
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
int nextState = fTransTable[curState][elemIndex];
if (nextState != -1) {
if (o != null) {
if (curState == nextState) {
// Do not include transitions which loop back to the
// current state if we've looped the maximum number
// of times or greater.
if (count >= o.maxOccurs &&
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
continue;
}
}
// Do not include transitions which advance past the
// current state if we have not looped enough times.
else if (count < o.minOccurs) {
continue;
}
}
ret.add(fElemMap[elemIndex]);
}
}
return ret;
}
Used by constant space algorithm for a{n,m} for n > 1 and
m <= unbounded. Called by a validator if validation of
countent model succeeds after subsuming a{n,m} to a*
(or a+) to check the n and m bounds.
Returns null
if validation of bounds is
successful. Returns a list of strings with error info
if not. Even entries in list returned are error codes
(used to look up properties) and odd entries are parameters
to be passed when formatting error message. Each parameter
is associated with the error code that preceeds it in
the list.
/**
* Used by constant space algorithm for a{n,m} for n > 1 and
* m <= unbounded. Called by a validator if validation of
* countent model succeeds after subsuming a{n,m} to a*
* (or a+) to check the n and m bounds.
* Returns <code>null</code> if validation of bounds is
* successful. Returns a list of strings with error info
* if not. Even entries in list returned are error codes
* (used to look up properties) and odd entries are parameters
* to be passed when formatting error message. Each parameter
* is associated with the error code that preceeds it in
* the list.
*/
public List<String> checkMinMaxBounds() {
List<String> result = null;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
int count = fElemMapCounter[elemIndex];
if (count == -1) {
continue;
}
final int minOccurs = fElemMapCounterLowerBound[elemIndex];
final int maxOccurs = fElemMapCounterUpperBound[elemIndex];
if (count < minOccurs) {
if (result == null) result = new ArrayList<>();
result.add("cvc-complex-type.2.4.b");
result.add("{" + fElemMap[elemIndex] + "}");
}
if (maxOccurs != -1 && count > maxOccurs) {
if (result == null) result = new ArrayList<>();
result.add("cvc-complex-type.2.4.d.1");
result.add("{" + fElemMap[elemIndex] + "}");
}
}
return result;
}
public int [] occurenceInfo(int[] state) {
if (fCountingStates != null) {
int curState = state[0];
if (curState < 0) {
curState = state[1];
}
Occurence o = fCountingStates[curState];
if (o != null) {
int [] occurenceInfo = new int[4];
occurenceInfo[0] = o.minOccurs;
occurenceInfo[1] = o.maxOccurs;
occurenceInfo[2] = state[2];
occurenceInfo[3] = o.elemIndex;
return occurenceInfo;
}
}
return null;
}
public String getTermName(int termId) {
Object term = fElemMap[termId];
return (term != null) ? term.toString() : null;
}
public boolean isCompactedForUPA() {
return fIsCompactedForUPA;
}
} // class DFAContentModel