/* Woodstox XML processor
 * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package com.ctc.wstx.msv;

import java.util.*;

import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.validation.*;
import org.relaxng.datatype.Datatype;

import com.sun.msv.grammar.IDContextProvider2;
import com.sun.msv.util.DatatypeRef;
import com.sun.msv.util.StartTagInfo;
import com.sun.msv.util.StringRef;
import com.sun.msv.verifier.Acceptor;
import com.sun.msv.verifier.DocumentDeclaration;
import com.sun.msv.verifier.regexp.StringToken;
import com.ctc.wstx.util.ElementId;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.TextAccumulator;

Generic validator instance to be used for all Multi-Schema Validator backed implementations. A common class can be used since functionality is almost identical between variants (RNG, W3C SChema); minor differences that exist can be configured by settings provided.

Note about id context provider interface: while it'd be nice to separate that part out, it is unfortunately closely tied to the validation process. Hence it's directly implemented by this class.

/** * Generic validator instance to be used for all Multi-Schema Validator * backed implementations. A common class can be used since functionality * is almost identical between variants (RNG, W3C SChema); minor * differences that exist can be configured by settings provided. *<p> * Note about id context provider interface: while it'd be nice to * separate that part out, it is unfortunately closely tied to the * validation process. Hence it's directly implemented by this class. */
public final class GenericMsvValidator extends XMLValidator implements com.sun.msv.grammar.IDContextProvider2, XMLStreamConstants { /* /////////////////////////////////////////////////////////// // Configuration /////////////////////////////////////////////////////////// */ final XMLValidationSchema mParentSchema; final ValidationContext mContext; final DocumentDeclaration mVGM; /* /////////////////////////////////////////////////////////// // State, helper objects /////////////////////////////////////////////////////////// */ final ArrayList<Object> mAcceptors = new ArrayList<Object>(); Acceptor mCurrAcceptor = null; final TextAccumulator mTextAccumulator = new TextAccumulator();
Map that contains information about element id (values of attributes or textual content with type ID) declarations and references
/** * Map that contains information about element id (values of attributes * or textual content with type ID) declarations and references */
ElementIdMap mIdDefs; /* /////////////////////////////////////////////////////////// // State, positions /////////////////////////////////////////////////////////// */ String mCurrAttrPrefix; String mCurrAttrLocalName;
Sometimes a problem object has to be temporarily stored, and only reported later on. This happens when exceptions can not be thrown via code outside of Woodstox (like validation methods in MSV that do callbacks).
/** * Sometimes a problem object has to be temporarily * stored, and only reported later on. This happens * when exceptions can not be thrown via code outside * of Woodstox (like validation methods in MSV that do * callbacks). */
XMLValidationProblem mProblem; /* /////////////////////////////////////////////////////////// // Helper objects /////////////////////////////////////////////////////////// */ final StringRef mErrorRef = new StringRef();
StartTagInfo instance need not be thread-safe, and it is not immutable so let's reuse one instance during a single validation.
/** * StartTagInfo instance need not be thread-safe, and it is not immutable * so let's reuse one instance during a single validation. */
final StartTagInfo mStartTag = new StartTagInfo("", "", "", null, (IDContextProvider2) null);
Since `StartTagInfo` has no place for prefix, hold reference to one here
/** * Since `StartTagInfo` has no place for prefix, hold reference to one here */
protected String mStartTagPrefix = "";
This object provides limited access to attribute values of the currently validated element.
/** * This object provides limited access to attribute values of the * currently validated element. */
final AttributeProxy mAttributeProxy; /* /////////////////////////////////////////////////////////// // Construction, configuration /////////////////////////////////////////////////////////// */ public GenericMsvValidator(XMLValidationSchema parent, ValidationContext ctxt, DocumentDeclaration vgm) { mParentSchema = parent; mContext = ctxt; mVGM = vgm; mCurrAcceptor = mVGM.createAcceptor(); mAttributeProxy = new AttributeProxy(ctxt); } /* /////////////////////////////////////////////////////////// // IDContextProvider2 implementation: // // Core RelaxNG ValidationContext implementation // (org.relaxng.datatype.ValidationContext, base interface // of the id provider context) /////////////////////////////////////////////////////////// */ @Override public String getBaseUri() { return mContext.getBaseUri(); } @Override public boolean isNotation(String notationName) { return mContext.isNotationDeclared(notationName); } @Override public boolean isUnparsedEntity(String entityName) { return mContext.isUnparsedEntityDeclared(entityName); } @Override public String resolveNamespacePrefix(String prefix) { return mContext.getNamespaceURI(prefix); } /* /////////////////////////////////////////////////////////// // IDContextProvider2 implementation, extensions over // core ValidationContext /////////////////////////////////////////////////////////// */

Note: we have to throw a dummy marker exception, which merely signals that a validation problem is to be reported. This is obviously messy, but has to do for now.

/** *<p> * Note: we have to throw a dummy marker exception, which merely * signals that a validation problem is to be reported. * This is obviously messy, but has to do for now. */
@Override public void onID(Datatype datatype, StringToken idToken) throws IllegalArgumentException { if (mIdDefs == null) { mIdDefs = new ElementIdMap(); } int idType = datatype.getIdType(); Location loc = mContext.getValidationLocation(); PrefixedName elemPName = getElementPName(); PrefixedName attrPName = getAttrPName(); if (idType == Datatype.ID_TYPE_ID) { String idStr = idToken.literal.trim(); ElementId eid = mIdDefs.addDefined(idStr, loc, elemPName, attrPName); // We can detect dups by checking if Location is the one we passed: if (eid.getLocation() != loc) { mProblem = new XMLValidationProblem(loc, "Duplicate id '"+idStr+"', first declared at "+eid.getLocation()); mProblem.setReporter(this); } } else if (idType == Datatype.ID_TYPE_IDREF) { String idStr = idToken.literal.trim(); mIdDefs.addReferenced(idStr, loc, elemPName, attrPName); } else if (idType == Datatype.ID_TYPE_IDREFS) { StringTokenizer tokens = new StringTokenizer(idToken.literal); while (tokens.hasMoreTokens()) { mIdDefs.addReferenced(tokens.nextToken(), loc, elemPName, attrPName); } } else { // sanity check throw new IllegalStateException("Internal error: unexpected ID datatype: "+datatype); } } /* /////////////////////////////////////////////////////////// // XMLValidator implementation /////////////////////////////////////////////////////////// */ @Override public XMLValidationSchema getSchema() { return mParentSchema; }
Method called to update information about the newly encountered (start) element. At this point namespace information has been resolved, but no DTD validation has been done. Validator is to do these validations, including checking for attribute value (and existence) compatibility.
/** * Method called to update information about the newly encountered (start) * element. At this point namespace information has been resolved, but * no DTD validation has been done. Validator is to do these validations, * including checking for attribute value (and existence) compatibility. */
@Override public void validateElementStart(String localName, String uri, String prefix) throws XMLStreamException { /* [WSTX-200]: If sub-tree we were to validate has ended, we * have no current acceptor, and must quite. Ideally we would * really handle this more cleanly but... */ if (mCurrAcceptor == null) { return; } // Very first thing: do we have text collected? if (mTextAccumulator.hasText()) { doValidateText(mTextAccumulator); } /* 31-Mar-2006, TSa: MSV seems to require empty String for empty/no * namespace, not null. */ if (uri == null) { uri = ""; } /* Do we need to properly fill it? Or could we just put local name? * Looking at code, I do believe it's only used for error reporting * purposes... */ //String qname = (prefix == null || prefix.length() == 0) ? localName : (prefix + ":" +localName); String qname = localName; mStartTag.reinit(uri, localName, qname, mAttributeProxy, this); mStartTagPrefix = prefix; mCurrAcceptor = mCurrAcceptor.createChildAcceptor(mStartTag, mErrorRef); /* As per documentation, the side-effect of getting the error message * is that we also get a recoverable non-null acceptor... thus, should * never (?) see null acceptor being returned */ if (mErrorRef.str != null) { reportError(mErrorRef, START_ELEMENT, _qname(uri, localName, prefix)); } if (mProblem != null) { // pending problems (to throw exception on)? XMLValidationProblem p = mProblem; mProblem = null; mContext.reportProblem(p); } mAcceptors.add(mCurrAcceptor); } @Override public String validateAttribute(String localName, String uri, String prefix, String value) throws XMLStreamException { mCurrAttrLocalName = localName; mCurrAttrPrefix = prefix; if (mCurrAcceptor != null) { String qname = localName; // for now, let's assume we don't need prefixed version DatatypeRef typeRef = null; // for now, let's not care /* 31-Mar-2006, TSa: MSV seems to require empty String for empty/no * namespace, not null. */ if (uri == null) { uri = ""; } if (!mCurrAcceptor.onAttribute2(uri, localName, qname, value, this, mErrorRef, typeRef) || mErrorRef.str != null) { reportError(mErrorRef, ATTRIBUTE, _qname(uri, localName, prefix)); } if (mProblem != null) { // pending problems (to throw exception on)? XMLValidationProblem p = mProblem; mProblem = null; mContext.reportProblem(p); } } // No normalization done by RelaxNG, is there? (at least nothing // visible to callers that is) return null; } @Override public String validateAttribute(String localName, String uri, String prefix, char[] valueChars, int valueStart, int valueEnd) throws XMLStreamException { int len = valueEnd - valueStart; // This is very sub-optimal... but MSV doesn't deal with char arrays. return validateAttribute(localName, uri, prefix, new String(valueChars, valueStart, len)); } @Override public int validateElementAndAttributes() throws XMLStreamException { // Not handling any attributes mCurrAttrLocalName = mCurrAttrPrefix = ""; if (mCurrAcceptor != null) { /* start tag info is still intact here (only attributes sent * since child acceptor was created) */ if (!mCurrAcceptor.onEndAttributes(mStartTag, mErrorRef) || mErrorRef.str != null) { reportError(mErrorRef, XMLStreamConstants.END_ELEMENT, _startTagAsQName()); } int stringChecks = mCurrAcceptor.getStringCareLevel(); switch (stringChecks) { case Acceptor.STRING_PROHIBITED: // only WS return XMLValidator.CONTENT_ALLOW_WS; case Acceptor.STRING_IGNORE: // anything (mixed content models) return XMLValidator.CONTENT_ALLOW_ANY_TEXT; case Acceptor.STRING_STRICT: // validatable (data-oriented) return XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT; default: throw new IllegalArgumentException("Internal error: unexpected string care level value return by MSV: "+stringChecks); } } // If no acceptor, we are recovering, no need or use to validate text return CONTENT_ALLOW_ANY_TEXT; }
Returns:Validation state that should be effective for the parent element state
/** * @return Validation state that should be effective for the parent * element state */
@Override public int validateElementEnd(String localName, String uri, String prefix) throws XMLStreamException { // Very first thing: do we have text collected? /* 27-Feb-2009, TSa: [WSTX-191]: Actually MSV expects us to call * validation anyway, in case there might be restriction(s) on * textual content. Otherwise we'll get an error. */ doValidateText(mTextAccumulator); // [WSTX-200]: need to avoid problems when doing sub-tree // validation... not a proper solution, but has to do for now int lastIx = mAcceptors.size()-1; if (lastIx < 0) { return XMLValidator.CONTENT_ALLOW_WS; } Acceptor acc = (Acceptor)mAcceptors.remove(lastIx); if (acc != null) { // may be null during error recovery? or not? if (!acc.isAcceptState(mErrorRef) || mErrorRef.str != null) { reportError(mErrorRef, XMLStreamConstants.END_ELEMENT, _qname(uri, localName, prefix)); } } if (lastIx == 0) { // root closed mCurrAcceptor = null; } else { mCurrAcceptor = (Acceptor) mAcceptors.get(lastIx-1); } if (mCurrAcceptor != null && acc != null) { if (!mCurrAcceptor.stepForward(acc, mErrorRef) || mErrorRef.str != null) { reportError(mErrorRef, XMLStreamConstants.END_ELEMENT, _qname(uri, localName, prefix)); } int stringChecks = mCurrAcceptor.getStringCareLevel(); switch (stringChecks) { case Acceptor.STRING_PROHIBITED: // only WS return XMLValidator.CONTENT_ALLOW_WS; case Acceptor.STRING_IGNORE: // anything (mixed content models) return XMLValidator.CONTENT_ALLOW_ANY_TEXT; case Acceptor.STRING_STRICT: // validatable (data-oriented) return XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT; default: throw new IllegalArgumentException("Internal error: unexpected string care level value return by MSV: "+stringChecks); } } return XMLValidator.CONTENT_ALLOW_ANY_TEXT; } @Override public void validateText(String text, boolean lastTextSegment) throws XMLStreamException { /* If we got here, then it's likely we do need to call onText2(). * (not guaranteed, though; in case of multiple parallel validators, * only one of them may actually be interested) */ mTextAccumulator.addText(text); if (lastTextSegment) { doValidateText(mTextAccumulator); } } @Override public void validateText(char[] cbuf, int textStart, int textEnd, boolean lastTextSegment) throws XMLStreamException { /* If we got here, then it's likely we do need to call onText(). * (not guaranteed, though; in case of multiple parallel validators, * only one of them may actually be interested) */ mTextAccumulator.addText(cbuf, textStart, textEnd); if (lastTextSegment) { doValidateText(mTextAccumulator); } } @Override public void validationCompleted(boolean eod) throws XMLStreamException { /* Ok, so, we should verify that there are no undefined * IDREF/IDREFS references. But only if we hit EOF, not * if validation was cancelled. */ if (eod) { if (mIdDefs != null) { ElementId ref = mIdDefs.getFirstUndefined(); if (ref != null) { // problem! String msg = "Undefined ID '"+ref.getId() +"': referenced from element <" +ref.getElemName()+">, attribute '" +ref.getAttrName()+"'"; reportError(msg, ref.getLocation()); } } } } /* /////////////////////////////////////////////////////////// // Attribute info access /////////////////////////////////////////////////////////// */ // // // Access to type info @Override public String getAttributeType(int index) { // !!! TBI return null; } @Override public int getIdAttrIndex() { // !!! TBI return -1; } @Override public int getNotationAttrIndex() { // !!! TBI return -1; } /* /////////////////////////////////////////////////////////// // Internal methods /////////////////////////////////////////////////////////// */ PrefixedName getElementPName() { return PrefixedName.valueOf(mContext.getCurrentElementName()); } PrefixedName getAttrPName() { return new PrefixedName(mCurrAttrPrefix, mCurrAttrLocalName); } void doValidateText(TextAccumulator textAcc) throws XMLStreamException { if (mCurrAcceptor != null) { String str = textAcc.getAndClear(); DatatypeRef typeRef = null; if (!mCurrAcceptor.onText2(str, this, mErrorRef, typeRef) || mErrorRef.str != null) { reportError(mErrorRef, CDATA, _startTagAsQName()); } } } private void reportError(StringRef errorRef, int type, QName name) throws XMLStreamException { String msg = errorRef.str; errorRef.str = null; if (msg == null || msg.isEmpty()) { switch (type) { case START_ELEMENT: msg = "Unknown reason (at start element "+_name(name, "<", ">")+")"; break; case END_ELEMENT: msg = "Unknown reason (at end element "+_name(name, "</", ">")+")"; break; case ATTRIBUTE: msg = "Unknown reason (at attribute "+_name(name, "'", "'")+")"; break; case CDATA: default: msg = "Unknown reason (at CDATA section, inside element "+_name(name, "<", ">")+")"; break; } } reportError(msg); } private void reportError(String msg) throws XMLStreamException { reportError(msg, mContext.getValidationLocation()); } private void reportError(String msg, Location loc) throws XMLStreamException { XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR); prob.setReporter(this); mContext.reportProblem(prob); } private String _name(QName qn, String prefix, String suffix) { if (qn == null) { return "UNKNOWN"; } String name = qn.getLocalPart(); String p = qn.getPrefix(); if (p != null && !p.isEmpty()) { name = p + ":" + name; } return prefix + name + suffix; } private QName _startTagAsQName() { return _qname(mStartTag.namespaceURI, mStartTag.localName, mStartTagPrefix); } private QName _qname(String ns, String local, String prefix) { if (prefix == null) { prefix = ""; } if (ns == null) { ns = ""; } // should we even allow this? if (local == null) { local = ""; } return new QName(ns, local, prefix); } }