/* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.evt;
import java.util.NoSuchElementException;
import javax.xml.stream.*;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
import javax.xml.stream.util.XMLEventAllocator;
import org.codehaus.stax2.XMLEventReader2;
import org.codehaus.stax2.XMLStreamReader2;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.exc.WstxParsingException;
import com.ctc.wstx.sr.StreamScanner;
Woodstox version of XMLEventReader2
(and XMLEventReader
). NOTE: up to Woodstox 5.1, this was based on Stax2 Reference Implementation (Stax2EventReaderImpl
), but due to various issues has temporarily (?) been cut-paste-modified here. Ideally it would be reconciled once Stax2-api version 4.2 can be relied as baseline, but that may take time.
/**
* Woodstox version of {@link XMLEventReader2} (and {@link XMLEventReader}).
*<p>
* NOTE: up to Woodstox 5.1, this was based on Stax2 Reference Implementation
* ({@link org.codehaus.stax2.ri.Stax2EventReaderImpl}), but due to various issues
* has temporarily (?) been cut-paste-modified here. Ideally it would be reconciled
* once Stax2-api version 4.2 can be relied as baseline, but that may take time.
*/
public class WstxEventReader
// extends Stax2EventReaderImpl // before 5.2
implements XMLEventReader2, XMLStreamConstants
{
// // // Enumerated state ids
protected final static int STATE_INITIAL = 1;
protected final static int STATE_END_OF_INPUT = 2;
protected final static int STATE_CONTENT = 3;
// // // Enumerated error case ids
Current state when getElementText() called not START_ELEMENT
/**
* Current state when getElementText() called not START_ELEMENT
*/
protected final static int ERR_GETELEMTEXT_NOT_START_ELEM = 1;
Encountered non-textual event (other than closing END_ELEMENT)
when collecting text for getElementText()
/**
* Encountered non-textual event (other than closing END_ELEMENT)
* when collecting text for getElementText()
*/
protected final static int ERR_GETELEMTEXT_NON_TEXT_EVENT = 2;
Encountered CHARACTERS or CDATA that contains non-white space
char(s), when trying to locate tag with nextTag()
/**
* Encountered CHARACTERS or CDATA that contains non-white space
* char(s), when trying to locate tag with nextTag()
*/
protected final static int ERR_NEXTTAG_NON_WS_TEXT = 3;
Encountered non-skippable non-text/element event with
nextTag()
/**
* Encountered non-skippable non-text/element event with
* nextTag()
*/
protected final static int ERR_NEXTTAG_WRONG_TYPE = 4;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final XMLEventAllocator mAllocator;
protected final XMLStreamReader2 mReader;
/*
/**********************************************************************
/* State
/**********************************************************************
*/
/**
* Event that has been peeked, ie. loaded without call to
* {@link #nextEvent}; will be returned and cleared by
* call to {@link #nextEvent} (or, returned again if peeked
* again)
*/
protected XMLEvent mPeekedEvent = null;
High-level state indicator, with currently three values:
whether we are initializing (need to synthetize START_DOCUMENT),
at END_OF_INPUT (end-of-doc), or otherwise, normal operation.
Useful in simplifying some methods, as well as to make sure
that independent of how stream reader handles things, event reader
can reliably detect End-Of-Document.
/**
* High-level state indicator, with currently three values:
* whether we are initializing (need to synthetize START_DOCUMENT),
* at END_OF_INPUT (end-of-doc), or otherwise, normal operation.
* Useful in simplifying some methods, as well as to make sure
* that independent of how stream reader handles things, event reader
* can reliably detect End-Of-Document.
*/
protected int mState = STATE_INITIAL;
This variable keeps track of the type of the 'previous' event
when peeking for the next Event. It is needed for some functionality,
to remember state even when underlying parser has to move to peek
the next event.
/**
* This variable keeps track of the type of the 'previous' event
* when peeking for the next Event. It is needed for some functionality,
* to remember state even when underlying parser has to move to peek
* the next event.
*/
protected int mPrePeekEvent = START_DOCUMENT;
/*
/**********************************************************************
/* Woodstox-specific
/**********************************************************************
*/
Marker flag to allow specialized handling in "multi-document" reading
mode.
/**
* Marker flag to allow specialized handling in "multi-document" reading
* mode.
*/
protected final boolean mCfgMultiDocMode;
/*
/**********************************************************************
/* Construction
/**********************************************************************
*/
public WstxEventReader(XMLEventAllocator a, XMLStreamReader2 r)
{
mAllocator = a;
mReader = r;
mCfgMultiDocMode = (r instanceof StreamScanner)
&& ((StreamScanner) r).getConfig().inputParsingModeDocuments();
}
/*
/**********************************************************************
/* Abstract methods that Stax2EventReaderImpl would expose
/**********************************************************************
*/
@Override
public boolean isPropertySupported(String name)
{
return ((XMLStreamReader2)getStreamReader()).isPropertySupported(name);
}
@Override
public boolean setProperty(String name, Object value)
{
return ((XMLStreamReader2)getStreamReader()).setProperty(name, value);
}
Method called upon encountering a problem that should result
in an exception being thrown. If non-null String is returned.
that will be used as the message of exception thrown; if null,
a standard message will be used instead.
Params: - errorType – Type of the problem, one of
ERR_
constants - currEvent – Type of the event that triggered the problem,
if any; -1 if not available.
/**
* Method called upon encountering a problem that should result
* in an exception being thrown. If non-null String is returned.
* that will be used as the message of exception thrown; if null,
* a standard message will be used instead.
*
* @param errorType Type of the problem, one of <code>ERR_</code>
* constants
* @param currEvent Type of the event that triggered the problem,
* if any; -1 if not available.
*/
protected String getErrorDesc(int errorType, int currEvent)
{
// Defaults are mostly fine, except we can easily add event type desc
switch (errorType) {
case ERR_GETELEMTEXT_NOT_START_ELEM:
return ErrorConsts.ERR_STATE_NOT_STELEM+", got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_GETELEMTEXT_NON_TEXT_EVENT:
return "Expected a text token, got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_NEXTTAG_NON_WS_TEXT:
return "Only all-whitespace CHARACTERS/CDATA (or SPACE) allowed for nextTag(), got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_NEXTTAG_WRONG_TYPE:
return "Got "+ErrorConsts.tokenTypeDesc(currEvent)+", instead of START_ELEMENT, END_ELEMENT or SPACE";
}
return null;
}
/*
/**********************************************************************
/* XMLEventReader API
/**********************************************************************
*/
@Override
public void close() throws XMLStreamException
{
mReader.close();
}
@Override
public String getElementText() throws XMLStreamException
{
/* Simple, if no peeking occured: can just forward this to the
* underlying parser
*/
if (mPeekedEvent == null) {
return mReader.getElementText();
}
XMLEvent evt = mPeekedEvent;
mPeekedEvent = null;
/* Otherwise need to verify that we are currently over START_ELEMENT.
* Problem is we have already went past it...
*/
if (mPrePeekEvent != START_ELEMENT) {
reportProblem(findErrorDesc(ERR_GETELEMTEXT_NOT_START_ELEM, mPrePeekEvent));
}
// ??? do we need to update mPrePeekEvent now
String str = null;
StringBuffer sb = null;
// Ok, fine, then just need to loop through and get all the text...
for (; true; evt = nextEvent()) {
if (evt.isEndElement()) {
break;
}
int type = evt.getEventType();
if (type == COMMENT || type == PROCESSING_INSTRUCTION) {
// can/should just ignore them
continue;
}
if (!evt.isCharacters()) {
reportProblem(findErrorDesc(ERR_GETELEMTEXT_NON_TEXT_EVENT, type));
}
String curr = evt.asCharacters().getData();
if (str == null) {
str = curr;
} else {
if (sb == null) {
sb = new StringBuffer(str.length() + curr.length());
sb.append(str);
}
sb.append(curr);
}
}
if (sb != null) {
return sb.toString();
}
return (str == null) ? "" : str;
}
@Override
public Object getProperty(String name) {
return mReader.getProperty(name);
}
@Override
public boolean hasNext() {
return (mState != STATE_END_OF_INPUT);
}
@Override
public XMLEvent nextEvent() throws XMLStreamException
{
if (mState == STATE_END_OF_INPUT) {
throwEndOfInput();
} else if (mState == STATE_INITIAL) {
mState = STATE_CONTENT;
return createStartDocumentEvent();
}
if (mPeekedEvent != null) {
XMLEvent evt = mPeekedEvent;
mPeekedEvent = null;
if (evt.isEndDocument()) {
updateStateEndDocument();
}
return evt;
}
return createNextEvent(true, mReader.next());
}
@Override
public Object next() {
try {
return nextEvent();
} catch (XMLStreamException sex) {
throwUnchecked(sex);
return null;
}
}
@Override
public XMLEvent nextTag() throws XMLStreamException
{
// If we have peeked something, need to process it
if (mPeekedEvent != null) {
XMLEvent evt = mPeekedEvent;
mPeekedEvent = null;
int type = evt.getEventType();
switch (type) {
case END_DOCUMENT:
return null;
case START_DOCUMENT:
// Need to skip START_DOCUMENT to get the root elem
break;
case SPACE:
// Ignorable WS is just fine
break;
/* !!! 07-Dec-2004, TSa: Specs are mum about Comments and PIs.
* But why would they not be skipped just like what
* the stream reader does?
*/
case COMMENT:
case PROCESSING_INSTRUCTION:
break;
case CDATA:
case CHARACTERS:
if (((Characters) evt).isWhiteSpace()) {
break;
}
reportProblem(findErrorDesc(ERR_NEXTTAG_NON_WS_TEXT, type));
break; // never gets here, but some compilers whine without...
case START_ELEMENT:
case END_ELEMENT:
return evt;
default:
reportProblem(findErrorDesc(ERR_NEXTTAG_WRONG_TYPE, type));
}
} else {
/* 13-Sep-2005, TSa: As pointed out by Patrick, we may need to
* initialize the state here, too; otherwise peek() won't work
* correctly. The problem is that following loop's get method
* does not use event reader's method but underlying reader's.
* As such, it won't update state: most importantly, initial
* state may not be changed to non-initial.
*/
if (mState == STATE_INITIAL) {
mState = STATE_CONTENT;
}
}
while (true) {
int next = mReader.next();
switch (next) {
case END_DOCUMENT:
return null;
case SPACE:
case COMMENT:
case PROCESSING_INSTRUCTION:
continue;
case CDATA:
case CHARACTERS:
if (mReader.isWhiteSpace()) {
continue;
}
reportProblem(findErrorDesc(ERR_NEXTTAG_NON_WS_TEXT, next));
break; // just to keep Jikes happy...
case START_ELEMENT:
case END_ELEMENT:
return createNextEvent(false, next);
default:
reportProblem(findErrorDesc(ERR_NEXTTAG_WRONG_TYPE, next));
}
}
}
@Override
public XMLEvent peek() throws XMLStreamException
{
if (mPeekedEvent == null) {
if (mState == STATE_END_OF_INPUT) {
// 06-Mar-2006, TSa: Fixed as per Arjen's suggestion:
//throwEndOfInput();
return null;
}
if (mState == STATE_INITIAL) {
// Not sure what it should be... but this should do:
mPrePeekEvent = START_DOCUMENT;
mPeekedEvent = createStartDocumentEvent();
mState = STATE_CONTENT;
} else {
mPrePeekEvent = mReader.getEventType();
mPeekedEvent = createNextEvent(false, mReader.next());
}
}
return mPeekedEvent;
}
Note: only here because we implement Iterator interface. Will not
work, don't bother calling it.
/**
* Note: only here because we implement Iterator interface. Will not
* work, don't bother calling it.
*/
@Override
public void remove() {
throw new UnsupportedOperationException("Can not remove events from XMLEventReader.");
}
Method called when we are about to return END_DOCUMENT
event.
Usually this should change state to STATE_END_OF_INPUT
, but
may vary for some alternative read modes (like multi-document)
Since: 4.2
/**
* Method called when we are about to return <code>END_DOCUMENT</code> event.
* Usually this should change state to <code>STATE_END_OF_INPUT</code>, but
* may vary for some alternative read modes (like multi-document)
*
* @since 4.2
*/
protected void updateStateEndDocument() throws XMLStreamException {
if (mCfgMultiDocMode) {
// As per [woodstox-core#42] should allow reading over multiple documents...
if (mReader.hasNext()) {
// Let's sanity-check that we get token we expect however:
int next = mReader.next();
if (next == START_DOCUMENT) {
mPrePeekEvent = START_DOCUMENT;
mPeekedEvent = createStartDocumentEvent();
mState = STATE_CONTENT;
return;
}
reportProblem("Unexpected token ("+ErrorConsts.tokenTypeDesc(next)
+") after END_DOCUMENT in multi-document mode, XMLStreamReader.hasNext() returning true");
}
}
mState = STATE_END_OF_INPUT;
}
/*
/**********************************************************************
/* XMLEventReader2 API
/**********************************************************************
*/
Note: although the interface allows implementations to throw an XMLStreamException
, the reference implementation doesn't currently need to. It's still declared, in case in future there is need to throw such an exception.
/**
*<p>
* Note: although the interface allows implementations to
* throw an {@link XMLStreamException}, the reference implementation
* doesn't currently need to.
* It's still declared, in case in future there is need to throw
* such an exception.
*/
@Override
public boolean hasNextEvent() throws XMLStreamException
{
return (mState != STATE_END_OF_INPUT);
}
/*
/**********************************************************************
/* Overridable factory methods
/**********************************************************************
*/
protected XMLEvent createNextEvent(boolean checkEOD, int type)
throws XMLStreamException
{
try {
XMLEvent evt = mAllocator.allocate(mReader);
if (checkEOD && type == END_DOCUMENT) {
updateStateEndDocument();
}
return evt;
} catch (RuntimeException rex) {
throw _checkUnwrap(rex);
}
}
protected XMLStreamException _checkUnwrap(RuntimeException rex)
{
/* 29-Mar-2008, TSa: Due to some problems with Stax API
* (lack of 'throws XMLStreamException' in signature of
* XMLStreamReader.getText(), for one) it is possible
* we will get a wrapped XMLStreamException. If so,
* we should be able to unwrap it.
*/
Throwable t = rex.getCause();
while (t != null) {
if (t instanceof XMLStreamException) {
return (XMLStreamException) t;
}
t = t.getCause();
}
// Nope, need to re-throw as is
throw rex;
}
Method called to create the very first event (START_DOCUMENT).
/**
* Method called to create the very first event (START_DOCUMENT).
*/
protected XMLEvent createStartDocumentEvent()
throws XMLStreamException
{
XMLEvent start = mAllocator.allocate(mReader);
return start;
}
/*
/**********************************************************************
/* Overridable error reporting methods
/**********************************************************************
*/
// note: `private` before 4.2
protected void throwEndOfInput()
{
throw new NoSuchElementException();
}
protected void throwUnchecked(XMLStreamException sex)
{
// Wrapped root cause? Let's only unwrap one layer; one that
// must have been used to expose the problem (if any)
Throwable t = (sex.getNestedException() == null) ? sex : sex.getNestedException();
// Unchecked? Can re-throw as is
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
}
if (t instanceof Error) {
throw (Error) t;
}
// Otherwise, let's just wrap it
throw new RuntimeException("[was "+t.getClass()+"] "+t.getMessage(), t);
}
protected void reportProblem(String msg)
throws XMLStreamException
{
reportProblem(msg, mReader.getLocation());
}
protected void reportProblem(String msg, Location loc)
throws XMLStreamException
{
if (loc == null) {
throw new WstxParsingException(msg);
}
throw new WstxParsingException(msg, loc);
}
/*
/**********************************************************************
/* Package methods for sub-classes
/**********************************************************************
*/
protected XMLStreamReader getStreamReader()
{
return mReader;
}
/*
/**********************************************************************
/* Other internal methods
/**********************************************************************
*/
// note: `private` before 4.2
Method used to locate error message description to use.
Calls sub-classes getErrorDesc()
first, and only
if no message found, uses default messages defined here.
/**
* Method used to locate error message description to use.
* Calls sub-classes <code>getErrorDesc()</code> first, and only
* if no message found, uses default messages defined here.
*/
protected final String findErrorDesc(int errorType, int currEvent)
{
String msg = getErrorDesc(errorType, currEvent);
if (msg != null) {
return msg;
}
switch (errorType) {
case ERR_GETELEMTEXT_NOT_START_ELEM:
return "Current state not START_ELEMENT when calling getElementText()";
case ERR_GETELEMTEXT_NON_TEXT_EVENT:
return "Expected a text token";
case ERR_NEXTTAG_NON_WS_TEXT:
return "Only all-whitespace CHARACTERS/CDATA (or SPACE) allowed for nextTag()";
case ERR_NEXTTAG_WRONG_TYPE:
return "Should only encounter START_ELEMENT/END_ELEMENT, SPACE, or all-white-space CHARACTERS";
}
// should never happen, but it'd be bad to throw another exception...
return "Internal error (unrecognized error type: "+errorType+")";
}
}