package com.fasterxml.aalto.in;
import java.io.*;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.XMLStreamLocation2;
import com.fasterxml.aalto.impl.ErrorConsts;
import com.fasterxml.aalto.impl.IoStreamException;
import com.fasterxml.aalto.impl.LocationImpl;
import com.fasterxml.aalto.util.DataUtil;
import com.fasterxml.aalto.util.TextBuilder;
import com.fasterxml.aalto.util.XmlCharTypes;
import com.fasterxml.aalto.util.XmlChars;
import com.fasterxml.aalto.util.XmlConsts;
This is the concrete scanner implementation used when input comes as a Reader
. In general using this scanner is quite a bit less optimal than that of InputStream
based scanner. Nonetheless, it is included for completeness, since Stax interface allows passing Readers as input sources. /**
* This is the concrete scanner implementation used when input comes
* as a {@link java.io.Reader}. In general using this scanner is quite
* a bit less optimal than that of {@link java.io.InputStream} based
* scanner. Nonetheless, it is included for completeness, since Stax
* interface allows passing Readers as input sources.
*/
public final class ReaderScanner
extends XmlScanner
{
Although java chars are basically UTF-16 in memory, the closest
match for char types is Latin1.
/**
* Although java chars are basically UTF-16 in memory, the closest
* match for char types is Latin1.
*/
private final static XmlCharTypes sCharTypes = InputCharTypes.getLatin1CharTypes();
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
Underlying InputStream to use for reading content.
/**
* Underlying InputStream to use for reading content.
*/
protected Reader _in;
/*
/**********************************************************************
/* Input buffering
/**********************************************************************
*/
protected char[] _inputBuffer;
protected int _inputPtr;
protected int _inputEnd;
Storage location for a single character that can not be pushed
back (for example, multi-byte char)
/**
* Storage location for a single character that can not be pushed
* back (for example, multi-byte char)
*/
protected int mTmpChar = INT_NULL;
/*
/**********************************************************************
/* Symbol handling
/**********************************************************************
*/
For now, symbol table contains prefixed names. In future it is
possible that they may be split into prefixes and local names?
/**
* For now, symbol table contains prefixed names. In future it is
* possible that they may be split into prefixes and local names?
*/
protected final CharBasedPNameTable _symbols;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public ReaderScanner(ReaderConfig cfg, Reader r,
char[] buffer, int ptr, int last)
{
super(cfg);
_in = r;
_inputBuffer = buffer;
_inputPtr = ptr;
_inputEnd = last;
_pastBytesOrChars = 0; // should it be passed by caller?
_rowStartOffset = 0; // should probably be passed by caller...
_symbols = cfg.getCBSymbols();
}
public ReaderScanner(ReaderConfig cfg, Reader r)
{
super(cfg);
_in = r;
_inputBuffer = cfg.allocFullCBuffer(ReaderConfig.DEFAULT_CHAR_BUFFER_LEN);
_inputPtr = _inputEnd = 0;
_pastBytesOrChars = 0; // should it be passed by caller?
_rowStartOffset = 0; // should probably be passed by caller...
_symbols = cfg.getCBSymbols();
}
@Override
protected void _releaseBuffers()
{
super._releaseBuffers();
if (_symbols.maybeDirty()) {
_config.updateCBSymbols(_symbols);
}
/* Note: if we have block input (_in == null), the buffer we
* use is not owned by scanner, can't recycle
* Also note that this method will always get called before
* _closeSource(); so that _in won't be cleared before we
* have a chance to see it.
*/
if (_in != null) {
if (_inputBuffer != null) {
_config.freeFullCBuffer(_inputBuffer);
_inputBuffer = null;
}
}
}
@Override
protected void _closeSource() throws IOException
{
if (_in != null) {
_in.close();
_in = null;
}
}
/*
/**********************************************************************
/* Public scanner interface (1st level parsing)
/**********************************************************************
*/
@Override
protected final void finishToken() throws XMLStreamException
{
_tokenIncomplete = false;
switch (_currToken) {
case PROCESSING_INSTRUCTION:
finishPI();
break;
case CHARACTERS:
finishCharacters();
break;
case COMMENT:
finishComment();
break;
case SPACE:
finishSpace();
break;
case DTD:
finishDTD(true); // true -> get text
break;
case CDATA:
finishCData();
break;
default:
ErrorConsts.throwInternalError();
}
}
// // // First, main iteration methods
@Override
public final int nextFromProlog(boolean isProlog) throws XMLStreamException
{
if (_tokenIncomplete) { // left-overs from last thingy?
skipToken();
}
// First: keep track of where event started
setStartLocation();
// Ok: we should get a WS or '<'. So, let's skip through WS
while (true) {
// Any more data? Just need a single byte
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
setStartLocation();
return TOKEN_EOI;
}
}
int c = _inputBuffer[_inputPtr++] & 0xFF;
// Really should get white space or '<'...
if (c == '<') {
break;
}
if (c != ' ') {
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
markLF();
setStartLocation();
return TOKEN_EOI;
}
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != '\t') {
reportPrologUnexpChar(isProlog, c, null);
}
}
}
// Ok, got LT:
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed(COMMENT);
}
char c = _inputBuffer[_inputPtr++];
if (c == '!') { // comment/DOCTYPE? (CDATA not legal)
return handlePrologDeclStart(isProlog);
}
if (c == '?') {
return handlePIStart();
}
/* End tag not allowed if no open tree; and only one root
* element (one root-level start tag)
*/
if (c == '/' || !isProlog) {
reportPrologUnexpElement(isProlog, c);
}
return handleStartElement(c);
}
@Override
public final int nextFromTree() throws XMLStreamException
{
if (_tokenIncomplete) { // left-overs?
if (skipToken()) { // Figured out next event (ENTITY_REFERENCE)?
// !!! We don't yet parse DTD, don't know real contents
return _nextEntity();
}
} else { // note: START_ELEMENT/END_ELEMENT never incomplete
if (_currToken == START_ELEMENT) {
if (_isEmptyTag) {
// Important: retain same start location as with START_ELEMENT, don't overwrite
--_depth;
return (_currToken = END_ELEMENT);
}
} else if (_currToken == END_ELEMENT) {
_currElem = _currElem.getParent();
// Any namespace declarations that need to be unbound?
while (_lastNsDecl != null && _lastNsDecl.getLevel() >= _depth) {
_lastNsDecl = _lastNsDecl.unbind();
}
} else {
// It's possible CHARACTERS entity with an entity ref:
if (_entityPending) {
_entityPending = false;
return _nextEntity();
}
}
}
// and except for special cases, mark down actual start location of the event
setStartLocation();
/* Any more data? Although it'd be an error not to get any,
* let's leave error reporting up to caller
*/
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
setStartLocation();
return TOKEN_EOI;
}
}
char c = _inputBuffer[_inputPtr];
/* Can get pretty much any type; start/end element, comment/PI,
* CDATA, text, entity reference...
*/
if (c == '<') { // root element, comment, proc instr?
++_inputPtr;
c = (_inputPtr < _inputEnd) ? _inputBuffer[_inputPtr++] : loadOne(COMMENT);
if (c == '!') { // comment or CDATA
return handleCommentOrCdataStart();
}
if (c == '?') {
return handlePIStart();
}
if (c == '/') {
return handleEndElement();
}
return handleStartElement(c);
}
if (c == '&') { // entity reference
++_inputPtr;
/* Need to expand; should indicate either text, or an unexpanded
* entity reference
*/
int i = handleEntityInText(false);
if (i == 0) { // general entity
return (_currToken = ENTITY_REFERENCE);
}
/* Nope, a char entity; need to indicate it came from an entity.
* Since we may want to store the char as is, too, let's negate
* entity-based char
*/
mTmpChar = -i;
} else {
/* Let's store it for future reference. May or may not be used --
* so let's not advance input ptr quite yet.
*/
mTmpChar = c;
}
// text, possibly/probably ok
if (_cfgLazyParsing) {
_tokenIncomplete = true;
} else {
finishCharacters();
}
return (_currToken = CHARACTERS);
}
Helper method used to isolate things that need to be (re)set in
cases where
/**
* Helper method used to isolate things that need to be (re)set in
* cases where
*/
protected int _nextEntity() {
// !!! Also, have to assume start location has been set or such
_textBuilder.resetWithEmpty();
// !!! TODO: handle start location?
return (_currToken = ENTITY_REFERENCE);
}
/*
/**********************************************************************
/* 2nd level parsing
/**********************************************************************
*/
protected final int handlePrologDeclStart(boolean isProlog) throws XMLStreamException
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c == '-') { // Comment?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == '-') {
if (_cfgLazyParsing) {
_tokenIncomplete = true;
} else {
finishComment();
}
return (_currToken = COMMENT);
}
} else if (c == 'D') { // DOCTYPE?
if (isProlog) { // no DOCTYPE in epilog
handleDtdStart();
// incomplete flag is set by handleDtdStart
if (!_cfgLazyParsing) {
if (_tokenIncomplete) {
finishDTD(true); // must copy contents, may be needed
_tokenIncomplete = false;
}
}
return DTD;
}
}
/* error... for error recovery purposes, let's just pretend
* like it was unfinished CHARACTERS, though.
*/
_tokenIncomplete = true;
_currToken = CHARACTERS;
reportPrologUnexpChar(isProlog, c, " (expected '-' for COMMENT)");
return _currToken; // never gets here
}
private final int handleDtdStart()
throws XMLStreamException
{
matchAsciiKeyword("DOCTYPE");
// And then some white space and root name
char c = skipInternalWs(true, "after DOCTYPE keyword, before root name");
_tokenName = parsePName(c);
c = skipInternalWs(false, null);
//boolean gotId;
if (c == 'P') { // PUBLIC
matchAsciiKeyword("PUBLIC");
c = skipInternalWs(true, null);
_publicId = parsePublicId(c);
c = skipInternalWs(true, null);
_systemId = parseSystemId(c);
c = skipInternalWs(false, null);
} else if (c == 'S') { // SYSTEM
matchAsciiKeyword("SYSTEM");
c = skipInternalWs(true, null);
_publicId = null;
_systemId = parseSystemId(c);
c = skipInternalWs(false, null);
} else {
_publicId = _systemId = null;
}
/* Ok; so, need to get either an internal subset, or the
* end:
*/
if (c == '>') { // fine, we are done
_tokenIncomplete = false;
return (_currToken = DTD);
}
if (c != '[') { // If not end, must have int. subset
String msg = (_systemId != null) ?
" (expected '[' for the internal subset, or '>' to end DOCTYPE declaration)" :
" (expected a 'PUBLIC' or 'SYSTEM' keyword, '[' for the internal subset, or '>' to end DOCTYPE declaration)";
reportTreeUnexpChar(c, msg);
}
/* Need not parse the int. subset yet, can leave as is, and then
* either skip or parse later on
*/
_tokenIncomplete = true;
return (_currToken = DTD);
}
protected final int handleCommentOrCdataStart()
throws XMLStreamException
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
// Let's first see if it's a comment (simpler)
if (c == '-') { // Comment
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c != '-') {
reportTreeUnexpChar(c, " (expected '-' for COMMENT)");
}
if (_cfgLazyParsing) {
_tokenIncomplete = true;
} else {
finishComment();
}
return (_currToken = COMMENT);
}
// If not, should be CDATA:
if (c == '[') { // CDATA
_currToken = CDATA;
for (int i = 0; i < 6; ++i) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c != CDATA_STR.charAt(i)) {
reportTreeUnexpChar(c, " (expected '"+CDATA_STR.charAt(i)+"' for CDATA section)");
}
}
if (_cfgLazyParsing) {
_tokenIncomplete = true;
} else {
finishCData();
}
return CDATA;
}
reportTreeUnexpChar(c, " (expected either '-' for COMMENT or '[CDATA[' for CDATA section)");
return TOKEN_EOI; // never gets here
}
protected final int handlePIStart()
throws XMLStreamException
{
_currToken = PROCESSING_INSTRUCTION;
// Ok, first, need a name
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
// Ok, first, need a name
char c = _inputBuffer[_inputPtr++];
_tokenName = parsePName(c);
{ // but is it "xml" (case insensitive)?
String ln = _tokenName.getLocalName();
if (ln.length() == 3 && ln.equalsIgnoreCase("xml") &&
_tokenName.getPrefix() == null) {
reportInputProblem(ErrorConsts.ERR_WF_PI_XML_TARGET);
}
}
/* Let's then verify that we either get a space, or closing
* '?>': this way we'll catch some problems right away, and also
* simplify actual processing of contents.
*/
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c <= INT_SPACE) {
// Ok, let's skip the white space...
while (true) {
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != ' ' && c != '\t') {
throwInvalidSpace(c);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr];
if (c > 0x0020) {
break;
}
++_inputPtr;
}
// Ok, got non-space, need to push back:
if (_cfgLazyParsing) {
_tokenIncomplete = true;
} else {
finishPI();
}
} else {
if (c != INT_QMARK) {
reportMissingPISpace(c);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c != '>') {
reportMissingPISpace(c);
}
_textBuilder.resetWithEmpty();
_tokenIncomplete = false;
}
return PROCESSING_INSTRUCTION;
}
Returns: Code point for the entity that expands to a valid XML
content character.
/**
* @return Code point for the entity that expands to a valid XML
* content character.
*/
protected final int handleCharEntity()
throws XMLStreamException
{
// Hex or decimal?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
int value = 0;
if (c == 'x') { // hex
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
break;
}
value = value << 4;
if (c <= '9' && c >= '0') {
value += (c - '0');
} else if (c >= 'a' && c <= 'f') {
value += 10 + (c - 'a');
} else if (c >= 'A' && c <= 'F') {
value += 10 + (c - 'A');
} else {
throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F)");
}
if (value > MAX_UNICODE_CHAR) { // Overflow?
reportEntityOverflow();
}
}
} else { // numeric (decimal)
while (c != ';') {
if (c <= '9' && c >= '0') {
value = (value * 10) + (c - '0');
if (value > MAX_UNICODE_CHAR) { // Overflow?
reportEntityOverflow();
}
} else {
throwUnexpectedChar(c, "; expected a decimal number");
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
}
}
// Ok, and then need to check result is a valid XML content char:
if (value >= 0xD800) { // note: checked for overflow earlier
if (value < 0xE000) { // no surrogates via entity expansion
reportInvalidXmlChar(value);
}
if (value == 0xFFFE || value == 0xFFFF) {
reportInvalidXmlChar(value);
}
} else if (value < 32) {
// XML 1.1 allows most other chars; 1.0 does not:
if (value != INT_LF && value != INT_CR && value != INT_TAB) {
if (!_xml11 || value == 0) {
reportInvalidXmlChar(value);
}
}
}
return value;
}
protected final int handleStartElement(char c)
throws XMLStreamException
{
_currToken = START_ELEMENT;
_currNsCount = 0;
PName elemName = parsePName(c);
/* Ok. Need to create a qualified name. Simplest for element
* in default ns (no extra work -- expressed as null binding);
* otherwise need to find binding
*/
String prefix = elemName.getPrefix();
boolean allBound; // flag to check 'late' bindings
if (prefix == null) { // element in default ns
allBound = true; // which need not be bound
} else {
elemName = bindName(elemName, prefix);
allBound = elemName.isBound();
}
_tokenName = elemName;
_currElem = new ElementScope(elemName, _currElem);
// And then attribute parsing loop:
int attrPtr = 0;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
// Intervening space to skip?
if (c <= INT_SPACE) {
do {
if (c == INT_LF) {
markLF();
} else if (c == INT_CR) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != ' ' && c != '\t') {
throwInvalidSpace(c);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
} while (c <= INT_SPACE);
} else if (c != INT_SLASH && c != INT_GT) {
throwUnexpectedChar(c, " expected space, or '>' or \"/>\"");
}
// Ok; either need to get an attribute name, or end marker:
if (c == INT_SLASH) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c != '>') {
throwUnexpectedChar(c, " expected '>'");
}
_isEmptyTag = true;
break;
} else if (c == '>') {
_isEmptyTag = false;
break;
} else if (c == '<') {
reportInputProblem("Unexpected '<' character in element (missing closing '>'?)");
}
// Ok, an attr name:
PName attrName = parsePName(c);
prefix = attrName.getPrefix();
boolean isNsDecl;
if (prefix == null) { // can be default ns decl:
isNsDecl = (attrName.getLocalName() == "xmlns");
} else {
// May be a namespace decl though?
if (prefix == "xmlns") {
isNsDecl = true;
} else {
attrName = bindName(attrName, prefix);
if (allBound) {
allBound = attrName.isBound();
}
isNsDecl = false;
}
}
// Optional space to skip again
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c > INT_SPACE) {
break;
}
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != ' ' && c != '\t') {
throwInvalidSpace(c);
}
}
if (c != '=') {
throwUnexpectedChar(c, " expected '='");
}
// Optional space to skip again
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c > INT_SPACE) {
break;
}
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != ' ' && c != '\t') {
throwInvalidSpace(c);
}
}
if (c != '"' && c != '\'') {
throwUnexpectedChar(c, " Expected a quote");
}
/* Ok, finally: value parsing. However, ns URIs are to be handled
* different from attribute values... let's offline URIs, since
* they should be less common than attribute values.
*/
if (isNsDecl) { // default ns, or explicit?
handleNsDeclaration(attrName, c);
++_currNsCount;
} else { // nope, a 'real' attribute:
attrPtr = collectValue(attrPtr, c, attrName);
}
}
{
// Note: this call also checks attribute uniqueness
int act = _attrCollector.finishLastValue(attrPtr);
if (act < 0) { // error, dup attr indicated by -1
act = _attrCollector.getCount(); // let's get correct count
reportInputProblem(_attrCollector.getErrorMsg());
}
_attrCount = act;
}
++_depth;
/* Was there any prefix that wasn't bound prior to use?
* That's legal, assuming declaration was found later on...
* let's check
*/
if (!allBound) {
if (!elemName.isBound()) { // element itself unbound
reportUnboundPrefix(_tokenName, false);
}
for (int i = 0, len = _attrCount; i < len; ++i) {
PName attrName = _attrCollector.getName(i);
if (!attrName.isBound()) {
reportUnboundPrefix(attrName, true);
}
}
}
return START_ELEMENT;
}
This method implements the tight loop for parsing attribute
values. It's off-lined from the main start element method to
simplify main method, which makes code more maintainable
and possibly easier for JIT/HotSpot to optimize.
/**
* This method implements the tight loop for parsing attribute
* values. It's off-lined from the main start element method to
* simplify main method, which makes code more maintainable
* and possibly easier for JIT/HotSpot to optimize.
*/
private final int collectValue(int attrPtr, char quoteChar, PName attrName)
throws XMLStreamException
{
char[] attrBuffer = _attrCollector.startNewValue(attrName, attrPtr);
final int[] TYPES = sCharTypes.ATTR_CHARS;
value_loop:
while (true) {
char c;
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (attrPtr >= attrBuffer.length) {
attrBuffer = _attrCollector.valueBufferFull();
}
int max = _inputEnd;
{
int max2 = ptr + (attrBuffer.length - attrPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = _inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
attrBuffer[attrPtr++] = c;
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
// fall through
case XmlCharTypes.CT_WS_LF:
markLF();
// fall through
case XmlCharTypes.CT_WS_TAB:
// Plus, need to convert these all to simple space
c = ' ';
break;
case XmlCharTypes.CT_LT:
throwUnexpectedChar(c, "'<' not allowed in attribute value");
case XmlCharTypes.CT_AMP:
{
int d = handleEntityInText(false);
if (d == 0) { // unexpanded general entity... not good
reportUnexpandedEntityInAttr(attrName, false);
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10));
d = 0xDC00 | (d & 0x3FF);
if (attrPtr >= attrBuffer.length) {
attrBuffer = _attrCollector.valueBufferFull();
}
}
c = (char) d;
}
break;
case XmlCharTypes.CT_ATTR_QUOTE:
if (c == quoteChar) {
break value_loop;
}
// default:
// Other chars are not important here...
}
} else if (c >= 0xD800) {
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
attrBuffer[attrPtr++] = c;
// Need to ensure room for one more
if (attrPtr >= attrBuffer.length) {
attrBuffer = _attrCollector.valueBufferFull();
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// We know there's room for at least one more char
attrBuffer[attrPtr++] = c;
}
return attrPtr;
}
Method called from the main START_ELEMENT handling loop, to
parse namespace URI values.
/**
* Method called from the main START_ELEMENT handling loop, to
* parse namespace URI values.
*/
private void handleNsDeclaration(PName name, char quoteChar)
throws XMLStreamException
{
int attrPtr = 0;
char[] attrBuffer = _nameBuffer;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c == quoteChar) {
break;
}
if (c == '&') { // entity
int d = handleEntityInText(false);
if (d == 0) { // general entity; should never happen
reportUnexpandedEntityInAttr(name, true);
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
if (attrPtr >= attrBuffer.length) {
_nameBuffer = attrBuffer = DataUtil.growArrayBy(attrBuffer, attrBuffer.length);
}
d -= 0x10000;
attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10));
d = 0xDC00 | (d & 0x3FF);
}
c = (char) d;
} else if (c == '<') { // error
throwUnexpectedChar(c, "'<' not allowed in attribute value");
} else {
if (c < INT_SPACE) {
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
c = '\n';
} else if (c != '\t') {
throwInvalidSpace(c);
}
}
}
if (attrPtr >= attrBuffer.length) {
_nameBuffer = attrBuffer = DataUtil.growArrayBy(attrBuffer, attrBuffer.length);
}
attrBuffer[attrPtr++] = c;
}
/* Simple optimization: for default ns removal (or, with
* ns 1.1, any other as well), will use empty value... no
* need to try to intern:
*/
if (attrPtr == 0) {
bindNs(name, "");
} else {
String uri = _config.canonicalizeURI(attrBuffer, attrPtr);
bindNs(name, uri);
}
}
protected final int handleEndElement()
throws XMLStreamException
{
--_depth;
_currToken = END_ELEMENT;
// Ok, at this point we have seen '/', need the name
_tokenName = _currElem.getName();
String pname = _tokenName.getPrefixedName();
char c;
int i = 0;
int len = pname.length();
do {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c != pname.charAt(i)) {
reportUnexpectedEndTag(pname);
}
} while (++i < len);
// Can still have a problem, if name didn't end there...
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c <= ' ') {
c = skipInternalWs(false, null);
} else if (c != '>') {
if (c == ':' || XmlChars.is10NameChar(c)) {
reportUnexpectedEndTag(pname);
}
}
if (c != '>') {
throwUnexpectedChar(c, " expected space or closing '>'");
}
return END_ELEMENT;
}
protected final int handleEntityInText(boolean inAttr)
throws XMLStreamException
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c == '#') {
return handleCharEntity();
}
String start;
if (c == 'a') { // amp or apos?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 'm') { // amp?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 'p') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
return INT_AMP;
}
start = "amp";
} else {
start = "am";
}
} else if (c == 'p') { // apos?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 'o') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 's') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
return INT_APOS;
}
start = "apos";
} else {
start = "apo";
}
} else {
start = "ap";
}
} else {
start = "a";
}
} else if (c == 'l') { // lt?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 't') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
return INT_LT;
}
start = "lt";
} else {
start = "l";
}
} else if (c == 'g') { // gt?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 't') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
return INT_GT;
}
start = "gt";
} else {
start = "g";
}
} else if (c == 'q') { // quot?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 'u') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 'o') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == 't') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
if (c == ';') {
return INT_QUOTE;
}
start = "quot";
} else {
start = "quo";
}
} else {
start = "qu";
}
} else {
start = "q";
}
} else {
start = "";
}
final int[] TYPES = sCharTypes.NAME_CHARS;
/* All righty: we have the beginning of the name, plus the first
* char too. So let's see what we can do with it.
*/
char[] cbuf = _nameBuffer;
int cix = 0;
for (int len = start.length(); cix < len; ++cix) {
cbuf[cix] = start.charAt(cix);
}
//int colon = -1;
while (c != ';') {
boolean ok;
// Has to be a valid name start char though:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_NAME_NONE:
case XmlCharTypes.CT_NAME_COLON: // not ok for entities?
case XmlCharTypes.CT_NAME_NONFIRST:
ok = (cix > 0);
break;
case XmlCharTypes.CT_NAME_ANY:
ok = true;
break;
default:
ok = false;
break;
}
} else {
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
int value = decodeSurrogate(c);
if (cix >= cbuf.length) {
_nameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
}
cbuf[cix++] = c;
c = _inputBuffer[_inputPtr-1]; // was read by decode func
ok = (cix == 0) ? XmlChars.is10NameStartChar(value)
: XmlChars.is10NameChar(value);
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
ok = false; // never gets here
} else {
ok = true;
}
}
if (!ok) {
reportInvalidNameChar(c, cix);
}
if (cix >= cbuf.length) {
_nameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
}
cbuf[cix++] = c;
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
}
// Ok, let's construct a (temporary) entity name, then:
String pname = new String(cbuf, 0, cix);
// (note: hash is dummy... not to be compared to anything etc)
_tokenName = new PNameC(pname, null, pname, 0);
/* One more thing: do we actually allow entities in this mode
* and with this event?
*/
if (_config.willExpandEntities()) {
reportInputProblem("General entity reference (&"+pname+";) encountered in entity expanding mode: operation not (yet) implemented");
}
if (inAttr) {
reportInputProblem("General entity reference (&"+pname+";) encountered in attribute value, in non-entity-expanding mode: no way to handle it");
}
return 0;
}
@Override
protected final void finishComment() throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
char[] outputBuffer = _textBuilder.resetWithEmpty();
int outPtr = 0;
main_loop:
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_HYPHEN: // '-->'?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '-') { // ok, must be end then
++_inputPtr;
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr++] != '>') {
reportDoubleHyphenInComments();
}
break main_loop;
}
break;
// default:
// Other types are not important here..
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// We know there's room for one more:
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
}
@Override
protected final void finishPI() throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
char[] outputBuffer = _textBuilder.resetWithEmpty();
int outPtr = 0;
main_loop:
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == CHAR_LF) {
++_inputPtr;
}
markLF();
c = '\n';
}
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_QMARK: // '?>'?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '>') {
++_inputPtr;
break main_loop;
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// We know there's room for one more:
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
}
@Override
protected final void finishDTD(boolean copyContents) throws XMLStreamException
{
char[] outputBuffer = copyContents ?
_textBuilder.resetWithEmpty() : null;
int outPtr = 0;
final int[] TYPES = sCharTypes.DTD_CHARS;
boolean inDecl = false; // in declaration/directive?
int quoteChar = 0; // inside quoted string?
main_loop:
while (true) {
char c;
/* First we'll have a quickie loop for speeding through
* uneventful chars...
*/
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
int max = _inputEnd;
if (outputBuffer != null) {
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
}
while (ptr < max) {
c = _inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
if (outputBuffer != null) {
outputBuffer[outPtr++] = c;
}
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_DTD_QUOTE: // apos or quot
if (quoteChar == 0) {
quoteChar = c;
} else {
if (quoteChar == c) {
quoteChar = 0;
}
}
break;
case XmlCharTypes.CT_DTD_LT:
if (!inDecl) {
inDecl = true;
}
break;
case XmlCharTypes.CT_DTD_GT:
if (quoteChar == 0) {
inDecl = false;
}
break;
case XmlCharTypes.CT_DTD_RBRACKET:
if (!inDecl && quoteChar == 0) {
break main_loop;
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
if (outputBuffer != null) {
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
if (outputBuffer != null) { // has room for one more
outputBuffer[outPtr++] = c;
}
}
if (outputBuffer != null) {
_textBuilder.setCurrentLength(outPtr);
}
// but still need to match the '>'...
char c = skipInternalWs(false, null);
if (c != '>') {
throwUnexpectedChar(c, " expected '>' after the internal subset");
}
}
@Override
protected final void finishCData() throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
char[] outputBuffer = _textBuilder.resetWithEmpty();
int outPtr = 0;
/* At this point, space (if any) has been skipped, and we are
* to parse and store the contents
*/
main_loop:
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
// And then exceptions:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_RBRACKET: // close ']]>' marker?
/* Ok: let's just parse all consequtive right brackets,
* and see if followed by greater-than char. This because
* we can only push back at most one char at a time, and
* thus can't easily just check a subset
*/
int count = 0; // ignore first bracket
char d;
do {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
d = _inputBuffer[_inputPtr];
if (d != ']') {
break;
}
++_inputPtr;
++count;
} while (true);
// Was the marker found?
boolean ok = (d == '>' && count >= 1);
if (ok) {
--count;
}
// Brackets to copy to output?
for (; count > 0; --count) {
outputBuffer[outPtr++] = ']';
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
}
if (ok) {
++_inputPtr; // to consume '>'
break main_loop;
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// Ok, can output the char; there's room for one char at least
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
/* 03-Feb-2009, tatu: To support coalescing mode, may need to
* do some extra work
*/
if (_cfgCoalescing && !_entityPending) {
finishCoalescedText();
}
}
@Override
protected final void finishCharacters() throws XMLStreamException
{
int outPtr;
char[] outputBuffer;
// Ok, so what was the first char / entity?
{
int c = mTmpChar;
if (c < 0) { // from entity; can just copy as is
c = -c;
outputBuffer = _textBuilder.resetWithEmpty();
outPtr = 0;
if ((c >> 16) != 0) { // surrogate pair?
c -= 0x10000;
/* Note: after resetting the buffer, it's known to have
* space for more than 2 chars we need to add
*/
outputBuffer[outPtr++] = (char) (0xD800 | (c >> 10));
c = 0xDC00 | (c & 0x3FF);
}
outputBuffer[outPtr++] = (char) c;
} else { // white space that we are interested in?
if (c == INT_CR || c == INT_LF) {
++_inputPtr; // wasn't advanced yet, in this case
outPtr = checkInTreeIndentation((char) c);
if (outPtr < 0) {
return;
}
// Above call also initializes the text builder appropriately
outputBuffer = _textBuilder.getBufferWithoutReset();
} else {
outputBuffer = _textBuilder.resetWithEmpty();
outPtr = 0;
}
}
}
final int[] TYPES = sCharTypes.TEXT_CHARS;
final char[] inputBuffer = _inputBuffer;
main_loop:
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
// And then exceptions:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (inputBuffer[ptr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_LT:
--_inputPtr;
break main_loop;
case XmlCharTypes.CT_AMP:
{
int d = handleEntityInText(false);
if (d == 0) { // unexpandable general parsed entity
// _inputPtr set by entity expansion method
_entityPending = true;
break main_loop;
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
outputBuffer[outPtr++] = (char) (0xD800 | (d >> 10));
// Need to ensure room for one more char
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
d = (0xDC00 | (d & 0x3FF));
}
c = (char) d;
}
break;
case XmlCharTypes.CT_RBRACKET: // ']]>'?
{
// Let's then just count number of brackets --
// in case they are not followed by '>'
int count = 1;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = inputBuffer[_inputPtr];
if (c != ']') {
break;
}
++_inputPtr; // to skip past bracket
++count;
}
if (c == '>' && count > 1) {
reportIllegalCDataEnd();
}
// Nope. Need to output all brackets, then; except
// for one that can be left for normal output
while (count > 1) {
outputBuffer[outPtr++] = ']';
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
// Need to ensure room for one more char
--count;
}
}
// Can just output the first ']' along normal output
c = ']';
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
// 03-Feb-2009, tatu: Need to support coalescing mode too:
if (_cfgCoalescing && !_entityPending) {
finishCoalescedText();
}
}
@Override
protected final void finishSpace() throws XMLStreamException
{
/* Ok: so, mTmpChar contains first space char. If it looks
* like indentation, we can probably optimize a bit...
*/
char tmp = (char)mTmpChar;
char[] outputBuffer;
int outPtr;
if (tmp == '\r' || tmp == '\n') {
outPtr = checkPrologIndentation(tmp);
if (outPtr < 0) {
return;
}
// Above call also initializes the text builder appropriately
outputBuffer = _textBuilder.getBufferWithoutReset();
} else {
outputBuffer = _textBuilder.resetWithEmpty();
outputBuffer[0] = tmp;
outPtr = 1;
}
int ptr = _inputPtr;
while (true) {
if (ptr >= _inputEnd) {
if (!loadMore()) {
break;
}
ptr = _inputPtr;
}
char c = _inputBuffer[ptr];
if (c > INT_SPACE) {
break;
}
++ptr;
if (c == INT_LF) {
markLF(ptr);
} else if (c == INT_CR) {
if (ptr >= _inputEnd) {
if (!loadMore()) { // still need to output the lf
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
outputBuffer[outPtr++] = '\n';
break;
}
ptr = _inputPtr;
}
if (_inputBuffer[ptr] == '\n') {
++ptr;
}
markLF(ptr);
c = '\n'; // need to convert to canonical lf
} else if (c != ' ' && c != '\t') {
_inputPtr = ptr;
throwInvalidSpace(c);
}
// Ok, can output the char
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
_textBuilder.setCurrentLength(outPtr);
}
/*
/**********************************************************************
/* 2nd level parsing for coalesced text
/**********************************************************************
*/
Method that gets called after a primary text segment (of type
CHARACTERS or CDATA, not applicable to SPACE) has been read in
text buffer. Method has to see if the following event would
be textual as well, and if so, read it (and any other following
textual segments).
/**
* Method that gets called after a primary text segment (of type
* CHARACTERS or CDATA, not applicable to SPACE) has been read in
* text buffer. Method has to see if the following event would
* be textual as well, and if so, read it (and any other following
* textual segments).
*/
protected final void finishCoalescedText()
throws XMLStreamException
{
while (true) {
// no matter what, will need (and can get) one char
if (_inputPtr >= _inputEnd) {
if (!loadMore()) { // most likely an error, will be handled later on
return;
}
}
if (_inputBuffer[_inputPtr] == '<') { // markup of some kind
/* In worst case, need 3 chars ("<![") all in all to know
* if we are getting a CDATA section
*/
if ((_inputPtr + 3) >= _inputEnd) {
if (!loadAndRetain(3)) {
// probably an error, but will be handled later
return;
}
}
if (_inputBuffer[_inputPtr+1] != '!'
|| _inputBuffer[_inputPtr+2] != '[') {
// can't be CDATA, we are done here
return;
}
// but let's verify it still:
_inputPtr += 3;
for (int i = 0; i < 6; ++i) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c != CDATA_STR.charAt(i)) {
reportTreeUnexpChar(c, " (expected '"+CDATA_STR.charAt(i)+"' for CDATA section)");
}
}
finishCoalescedCData();
} else { // textual (or entity, error etc)
finishCoalescedCharacters();
if (_entityPending) {
break;
}
}
}
}
// note: code mostly copied from 'finishCharacters', just simplified
// in some places
protected final void finishCoalescedCData()
throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
char[] outputBuffer = _textBuilder.getBufferWithoutReset();
int outPtr = _textBuilder.getCurrentLength();
/* At this point, space (if any) has been skipped, and we are
* to parse and store the contents
*/
main_loop:
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
// And then exceptions:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_RBRACKET: // close ']]>' marker?
/* Ok: let's just parse all consequtive right brackets,
* and see if followed by greater-than char. This because
* we can only push back at most one char at a time, and
* thus can't easily just check a subset
*/
int count = 0; // ignore first bracket
char d;
do {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
d = _inputBuffer[_inputPtr];
if (d != ']') {
break;
}
++_inputPtr;
++count;
} while (true);
// Was the marker found?
boolean ok = (d == '>' && count >= 1);
if (ok) {
--count;
}
// Brackets to copy to output?
for (; count > 0; --count) {
outputBuffer[outPtr++] = ']';
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
}
if (ok) {
++_inputPtr; // to consume '>'
break main_loop;
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// Ok, can output the char; there's room for one char at least
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
}
// note: code mostly copied from 'finishCharacters', just simplified
// in some places
protected final void finishCoalescedCharacters()
throws XMLStreamException
{
// first char can't be from (char) entity (wrt finishCharacters)
final int[] TYPES = sCharTypes.TEXT_CHARS;
final char[] inputBuffer = _inputBuffer;
char[] outputBuffer = _textBuilder.getBufferWithoutReset();
int outPtr = _textBuilder.getCurrentLength();
main_loop:
while (true) {
char c;
ascii_loop:
while (true) { // tight loop for ascii chars
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
// And then exceptions:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (inputBuffer[ptr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_LT:
--_inputPtr;
break main_loop;
case XmlCharTypes.CT_AMP:
{
int d = handleEntityInText(false);
if (d == 0) { // unexpandable general parsed entity
// _inputPtr set by entity expansion method
_entityPending = true;
break main_loop;
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
outputBuffer[outPtr++] = (char) (0xD800 | (d >> 10));
// Need to ensure room for one more char
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
d = (0xDC00 | (d & 0x3FF));
}
c = (char) d;
}
break;
case XmlCharTypes.CT_RBRACKET: // ']]>'?
{
// Let's then just count number of brackets --
// in case they are not followed by '>'
int count = 1;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = inputBuffer[_inputPtr];
if (c != ']') {
break;
}
++_inputPtr; // to skip past bracket
++count;
}
if (c == '>' && count > 1) {
reportIllegalCDataEnd();
}
// Nope. Need to output all brackets, then; except
// for one that can be left for normal output
while (count > 1) {
outputBuffer[outPtr++] = ']';
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
// Need to ensure room for one more char
--count;
}
}
// Can just output the first ']' along normal output
c = ']';
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
}
Method that gets called after a primary text segment (of type
CHARACTERS or CDATA, not applicable to SPACE) has been skipped.
Method has to see if the following event would
be textual as well, and if so, skip it (and any other following
textual segments).
Returns: True if we encountered an unexpandable entity
/**
* Method that gets called after a primary text segment (of type
* CHARACTERS or CDATA, not applicable to SPACE) has been skipped.
* Method has to see if the following event would
* be textual as well, and if so, skip it (and any other following
* textual segments).
*
* @return True if we encountered an unexpandable entity
*/
@Override
protected final boolean skipCoalescedText()
throws XMLStreamException
{
while (true) {
// no matter what, will need (and can get) one char
if (_inputPtr >= _inputEnd) {
if (!loadMore()) { // most likely an error, will be handled later on
return false;
}
}
if (_inputBuffer[_inputPtr] == '<') { // markup of some kind
/* In worst case, need 3 chars ("<![") all in all to know
* if we are getting a CDATA section
*/
if ((_inputPtr + 3) >= _inputEnd) {
if (!loadAndRetain(3)) { // probably an error, but will be handled later
return false;
}
}
if (_inputBuffer[_inputPtr+1] != '!'
|| _inputBuffer[_inputPtr+2] != '[') {
// can't be CDATA, we are done here
return false;
}
// but let's verify it still:
_inputPtr += 3;
for (int i = 0; i < 6; ++i) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c != CDATA_STR.charAt(i)) {
reportTreeUnexpChar(c, " (expected '"+CDATA_STR.charAt(i)+"' for CDATA section)");
}
}
skipCData();
} else { // textual (or entity, error etc)
if (skipCharacters()) {
return true;
}
}
}
}
/*
/**********************************************************************
/* 2nd level parsing for skipping content
/**********************************************************************
*/
@Override
protected final void skipComment()
throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
int max = _inputEnd;
if (ptr >= max) {
loadMoreGuaranteed();
ptr = _inputPtr;
max = _inputEnd;
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_HYPHEN: // '-->'?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '-') { // ok, must be end then
++_inputPtr;
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr++] != '>') {
reportDoubleHyphenInComments();
}
return;
}
break;
}
// default:
// Other types are not important here...
}
}
}
@Override
protected final void skipPI() throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
int max = _inputEnd;
if (ptr >= max) {
loadMoreGuaranteed();
ptr = _inputPtr;
max = _inputEnd;
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == CHAR_LF) {
++_inputPtr;
}
markLF();
}
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_QMARK: // '?>'?
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '>') {
++_inputPtr;
return;
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
/*char d =*/ checkSurrogate(c);
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
// skipping, no need to output
}
}
@Override
protected final boolean skipCharacters() throws XMLStreamException
{
final int[] TYPES = sCharTypes.TEXT_CHARS;
final char[] inputBuffer = _inputBuffer;
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
int max = _inputEnd;
if (ptr >= max) {
loadMoreGuaranteed();
ptr = _inputPtr;
max = _inputEnd;
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (inputBuffer[_inputPtr] == CHAR_LF) {
++_inputPtr;
}
markLF();
}
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_LT:
--_inputPtr;
return false;
case XmlCharTypes.CT_AMP:
{
int d = handleEntityInText(false);
if (d == 0) { // unexpandable general parsed entity
return true;
}
}
break;
case XmlCharTypes.CT_RBRACKET: // ']]>'?
{
// Let's then just count number of brackets --
// in case they are not followed by '>'
int count = 1;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = inputBuffer[_inputPtr];
if (c != ']') {
break;
}
++_inputPtr; // to skip past bracket
++count;
}
if (c == '>' && count > 1) {
reportIllegalCDataEnd();
}
}
// Can just output the first ']' along normal output
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
/*char d =*/ checkSurrogate(c);
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
}
}
@Override
protected final void skipCData() throws XMLStreamException
{
final int[] TYPES = sCharTypes.OTHER_CHARS;
final char[] inputBuffer = _inputBuffer;
while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
int max = _inputEnd;
if (ptr >= max) {
loadMoreGuaranteed();
ptr = _inputPtr;
max = _inputEnd;
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
}
_inputPtr = ptr;
}
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (inputBuffer[ptr] == CHAR_LF) {
++ptr;
++_inputPtr;
}
markLF(ptr);
}
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_RBRACKET: // ']]>'?
{
// end is nigh?
int count = 0;
do {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
++count;
c = _inputBuffer[_inputPtr++];
} while (c == ']');
if (c == '>') {
if (count > 1) { // gotcha
return;
}
// can still skip plain ']>'...
} else {
--_inputPtr; // need to push back last char
}
}
break;
// default:
// Other types are not important here...
}
} else if (c >= 0xD800) { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
/*char d =*/ checkSurrogate(c);
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
}
}
@Override
protected final void skipSpace() throws XMLStreamException
{
// mTmpChar has a space, but it's been checked, can ignore
int ptr = _inputPtr;
while (true) {
if (ptr >= _inputEnd) {
if (!loadMore()) {
break;
}
ptr = _inputPtr;
}
char c = _inputBuffer[ptr];
if (c > ' ') { // !!! TODO: xml 1.1 ws
break;
}
++ptr;
if (c == '\n') {
markLF(ptr);
} else if (c == '\r') {
if (ptr >= _inputEnd) {
if (!loadMore()) {
break;
}
ptr = _inputPtr;
}
if (_inputBuffer[ptr] == '\n') {
++ptr;
}
markLF(ptr);
} else if (c != ' ' && c != '\t') {
_inputPtr = ptr;
throwInvalidSpace(c);
}
}
_inputPtr = ptr;
}
/*
/**********************************************************************
/* Entity/name handling
/**********************************************************************
*/
Returns: First byte following skipped white space
/**
* @return First byte following skipped white space
*/
protected char skipInternalWs(boolean reqd, String msg)
throws XMLStreamException
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c > INT_SPACE) {
if (!reqd) {
return c;
}
reportTreeUnexpChar(c, " (expected white space "+msg+")");
}
do {
// But let's first handle the space we already got:
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
} else if (c != ' ' && c != '\t') {
throwInvalidSpace(c);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr++];
} while (c <= INT_SPACE);
return c;
}
private final void matchAsciiKeyword(String keyw)
throws XMLStreamException
{
for (int i = 1, len = keyw.length(); i < len; ++i) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (c != keyw.charAt(i)) {
reportTreeUnexpChar(c, " (expected '"+keyw.charAt(i)+"' for "+keyw+" keyword)");
}
}
}
Note: consequtive white space is only considered indentation,
if the following token seems like a tag (start/end). This so
that if a CDATA section follows, it can be coalesced in
coalescing mode. Although we could check if coalescing mode is
enabled, this should seldom have significant effect either way,
so it removes one possible source of problems in coalescing mode.
Returns: -1, if indentation was handled; offset in the output
buffer, if not
/**
*<p>
* Note: consequtive white space is only considered indentation,
* if the following token seems like a tag (start/end). This so
* that if a CDATA section follows, it can be coalesced in
* coalescing mode. Although we could check if coalescing mode is
* enabled, this should seldom have significant effect either way,
* so it removes one possible source of problems in coalescing mode.
*
* @return -1, if indentation was handled; offset in the output
* buffer, if not
*/
protected final int checkInTreeIndentation(char c)
throws XMLStreamException
{
if (c == '\r') {
// First a degenerate case, a lone \r:
if (_inputPtr >= _inputEnd && !loadMore()) {
_textBuilder.resetWithIndentation(0, CHAR_SPACE);
return -1;
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
}
markLF();
// Then need an indentation char (or start/end tag):
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr];
if (c != ' ' && c != '\t') {
// May still be indentation, if it's lt + non-exclamation mark
if (c == '<') {
if ((_inputPtr+1) < _inputEnd && _inputBuffer[_inputPtr+1] != '!') {
_textBuilder.resetWithIndentation(0, ' ');
return -1;
}
}
char[] outputBuffer = _textBuilder.resetWithEmpty();
outputBuffer[0] = '\n';
_textBuilder.setCurrentLength(1);
return 1;
}
// So how many do we get?
++_inputPtr;
int count = 1;
int max = (c == ' ') ? TextBuilder.MAX_INDENT_SPACES : TextBuilder.MAX_INDENT_TABS;
while (count <= max) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c2 = _inputBuffer[_inputPtr];
if (c2 != c) {
// Has to be followed by a start/end tag...
if (c2 == '<' && (_inputPtr+1) < _inputEnd
&& _inputBuffer[_inputPtr+1] != '!') {
_textBuilder.resetWithIndentation(count, c);
return -1;
}
break;
}
++_inputPtr;
++count;
}
// Nope, hit something else, or too long: need to just copy the stuff
// we know buffer has enough room either way
char[] outputBuffer = _textBuilder.resetWithEmpty();
outputBuffer[0] = '\n';
for (int i = 1; i <= count; ++i) {
outputBuffer[i] = c;
}
count += 1; // to account for leading lf
_textBuilder.setCurrentLength(count);
return count;
}
Returns: -1, if indentation was handled; offset in the output
buffer, if not
/**
* @return -1, if indentation was handled; offset in the output
* buffer, if not
*/
protected final int checkPrologIndentation(char c)
throws XMLStreamException
{
if (c == '\r') {
// First a degenerate case, a lone \r:
if (_inputPtr >= _inputEnd && !loadMore()) {
_textBuilder.resetWithIndentation(0, CHAR_SPACE);
return -1;
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
}
markLF();
// Ok, indentation char?
if (_inputPtr >= _inputEnd && !loadMore()) {
_textBuilder.resetWithIndentation(0, CHAR_SPACE);
return -1;
}
c = _inputBuffer[_inputPtr]; // won't advance past the char yet
if (c != ' ' && c != '\t') {
// If lt, it's still indentation ok:
if (c == '<') { // need
_textBuilder.resetWithIndentation(0, CHAR_SPACE);
return -1;
}
// Nope... something else
char[] outputBuffer = _textBuilder.resetWithEmpty();
outputBuffer[0] = '\n';
_textBuilder.setCurrentLength(1);
return 1;
}
// So how many do we get?
++_inputPtr;
int count = 1;
int max = (c == ' ') ? TextBuilder.MAX_INDENT_SPACES : TextBuilder.MAX_INDENT_TABS;
while (true) {
if (_inputPtr >= _inputEnd && !loadMore()) {
break;
}
if (_inputBuffer[_inputPtr] != c) {
break;
}
++_inputPtr;
++count;
if (count >= max) { // ok, can't share... but can build it still
// we know buffer has enough room
char[] outputBuffer = _textBuilder.resetWithEmpty();
outputBuffer[0] = '\n';
for (int i = 1; i <= count; ++i) {
outputBuffer[i] = c;
}
count += 1; // to account for leading lf
_textBuilder.setCurrentLength(count);
return count;
}
}
// Ok, gotcha?
_textBuilder.resetWithIndentation(count, c);
return -1;
}
protected PName parsePName(char c)
throws XMLStreamException
{
char[] nameBuffer = _nameBuffer;
/* Let's do just quick sanity check first; a thorough check will be
* done later on if necessary, now we'll just do the very cheap
* check to catch extra spaces etc.
*/
if (c < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
throwUnexpectedChar(c, "; expected a name start character");
}
nameBuffer[0] = c;
int hash = (int) c;
int ptr = 1;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = _inputBuffer[_inputPtr];
int d = (int) c;
if (d < 65) {
// Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
if (d < 45 || d > 58 || d == 47) {
// End of name, a single ascii char?
PName n = _symbols.findSymbol(nameBuffer, 0, ptr, hash);
if (n == null) {
n = addPName(nameBuffer, ptr, hash);
}
return n;
}
}
++_inputPtr;
if (ptr >= nameBuffer.length) {
_nameBuffer = nameBuffer = DataUtil.growArrayBy(nameBuffer, nameBuffer.length);
}
nameBuffer[ptr++] = c;
hash = (hash * 31) + d;
}
}
protected final PName addPName(char[] nameBuffer, int nameLen, int hash)
throws XMLStreamException
{
// Let's validate completely, now:
char c = nameBuffer[0];
int namePtr = 1;
int last_colon = -1; // where the colon is
if (c < 0xD800 || c >= 0xE000) {
if (!XmlChars.is10NameStartChar(c)) {
reportInvalidNameChar(c, 0);
}
} else {
if (nameLen == 1) {
reportInvalidFirstSurrogate(c);
}
// Only returns if ok; throws exception otherwise
checkSurrogateNameChar(c, nameBuffer[1], 0);
++namePtr;
}
for (; namePtr < nameLen; ++namePtr) {
c = nameBuffer[namePtr];
if (c < 0xD800 || c >= 0xE000) {
if (c == ':') {
if (last_colon >= 0) {
reportMultipleColonsInName();
}
last_colon = namePtr;
} else {
if (!XmlChars.is10NameChar(c)) {
reportInvalidNameChar(c, namePtr);
}
}
} else {
if ((namePtr+1) >= nameLen) { // unpaired surrogate
reportInvalidFirstSurrogate(c);
}
checkSurrogateNameChar(c, nameBuffer[namePtr+1], namePtr);
}
}
return _symbols.addSymbol(nameBuffer, 0, nameLen, hash);
}
protected String parsePublicId(char quoteChar)
throws XMLStreamException
{
char[] outputBuffer = _nameBuffer;
int outPtr = 0;
final int[] TYPES = XmlCharTypes.PUBID_CHARS;
boolean addSpace = false;
main_loop:
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
// Easier to check without char type table, first:
char c = _inputBuffer[_inputPtr++];
if (c == quoteChar) {
break main_loop;
}
if ((c > 0xFF) || TYPES[c] != XmlCharTypes.PUBID_OK) {
throwUnexpectedChar(c, " in public identifier");
}
// White space? Needs to be coalecsed
if (c <= INT_SPACE) {
addSpace = true;
continue;
}
if (addSpace) {
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
outputBuffer[outPtr++] = ' ';
addSpace = false;
}
if (outPtr >= outputBuffer.length) {
_nameBuffer = outputBuffer = DataUtil.growArrayBy(outputBuffer, outputBuffer.length);
outPtr = 0;
}
outputBuffer[outPtr++] = c;
}
return new String(outputBuffer, 0, outPtr);
}
protected String parseSystemId(char quoteChar)
throws XMLStreamException
{
char[] outputBuffer = _nameBuffer;
int outPtr = 0;
// attribute types are closest matches, so let's use them
final int[] TYPES = sCharTypes.ATTR_CHARS;
//boolean spaceToAdd = false;
main_loop:
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char c = _inputBuffer[_inputPtr++];
if (TYPES[c] != 0) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
c = handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR:
{
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_ATTR_QUOTE:
if (c == quoteChar) {
break main_loop;
}
}
}
if (outPtr >= outputBuffer.length) {
_nameBuffer = outputBuffer = DataUtil.growArrayBy(outputBuffer, outputBuffer.length);
outPtr = 0;
}
outputBuffer[outPtr++] = c;
}
return new String(outputBuffer, 0, outPtr);
}
/*
/**********************************************************************
/* Other parsing helper methods
/**********************************************************************
*/
This method is called to verify that a surrogate
pair found describes a legal surrogate pair (ie. expands
to a legal XML char)
/**
* This method is called to verify that a surrogate
* pair found describes a legal surrogate pair (ie. expands
* to a legal XML char)
*/
private char checkSurrogate(char firstChar)
throws XMLStreamException
{
if (firstChar >= 0xDC00) {
reportInvalidFirstSurrogate(firstChar);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char sec = _inputBuffer[_inputPtr++];
if (sec < 0xDC00 || sec >= 0xE000) {
reportInvalidSecondSurrogate(sec);
}
// And the composite, is it ok?
int val = ((firstChar - 0xD800) << 10) + 0x10000;
if (val > XmlConsts.MAX_UNICODE_CHAR) {
reportInvalidXmlChar(val);
}
return sec;
}
private int checkSurrogateNameChar(char firstChar, char sec, int index)
throws XMLStreamException
{
if (firstChar >= 0xDC00) {
reportInvalidFirstSurrogate(firstChar);
}
if (sec < 0xDC00 || sec >= 0xE000) {
reportInvalidSecondSurrogate(sec);
}
// And the composite, is it ok?
int val = ((firstChar - 0xD800) << 10) + 0x10000;
if (val > XmlConsts.MAX_UNICODE_CHAR) {
reportInvalidXmlChar(val);
}
// !!! TODO: xml 1.1 vs 1.0 rules: none valid for 1.0, many for 1.1
if (true) {
reportInvalidNameChar(val, index);
}
return val;
}
This method is similar to checkSurrogate
, but
returns the actual character code encoded by the surrogate
pair. This is needed if further validation rules (such as name
charactert checks) are to be done.
/**
* This method is similar to <code>checkSurrogate</code>, but
* returns the actual character code encoded by the surrogate
* pair. This is needed if further validation rules (such as name
* charactert checks) are to be done.
*/
private int decodeSurrogate(char firstChar)
throws XMLStreamException
{
if (firstChar >= 0xDC00) {
reportInvalidFirstSurrogate(firstChar);
}
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
char sec = _inputBuffer[_inputPtr++];
if (sec < 0xDC00 || sec >= 0xE000) {
reportInvalidSecondSurrogate(sec);
}
// And the composite, is it ok?
int val = ((firstChar - 0xD800) << 10) + 0x10000;
if (val > XmlConsts.MAX_UNICODE_CHAR) {
reportInvalidXmlChar(val);
}
return val;
}
private void reportInvalidFirstSurrogate(char ch)
throws XMLStreamException
{
reportInputProblem("Invalid surrogate character (code 0x"+Integer.toHexString((int) ch)+"): can not start a surrogate pair");
}
private void reportInvalidSecondSurrogate(char ch)
throws XMLStreamException
{
reportInputProblem("Invalid surrogate character (code "+Integer.toHexString((int) ch)+"): is not legal as the second part of a surrogate pair");
}
/*
/**********************************************************************
/* Location handling
/**********************************************************************
*/
@Override
public XMLStreamLocation2 getCurrentLocation()
{
return LocationImpl.fromZeroBased
(_config.getPublicId(), _config.getSystemId(),
_pastBytesOrChars + _inputPtr, _currRow, _inputPtr - _rowStartOffset);
}
@Override
public int getCurrentColumnNr() {
return _inputPtr - _rowStartOffset;
}
@Override
public long getStartingByteOffset() {
// N/A for this type
return -1L;
}
@Override
public long getStartingCharOffset() {
return _startRawOffset;
}
@Override
public long getEndingByteOffset() throws XMLStreamException {
// N/A for this type
return -1L;
}
@Override
public long getEndingCharOffset() throws XMLStreamException {
// Have to complete the token to know the ending location...
if (_tokenIncomplete) {
finishToken();
}
return _pastBytesOrChars + _inputPtr;
}
protected final void markLF(int offset)
{
_rowStartOffset = offset;
++_currRow;
}
protected final void markLF()
{
_rowStartOffset = _inputPtr;
++_currRow;
}
protected final void setStartLocation() {
_startRawOffset = _pastBytesOrChars + _inputPtr;
_startRow = _currRow;
_startColumn = _inputPtr - _rowStartOffset;
}
/*
/**********************************************************************
/* Input loading
/**********************************************************************
*/
@Override
protected final boolean loadMore() throws XMLStreamException
{
// If it's a block source, there's no Reader, or any more data:
if (_in == null) {
_inputEnd = 0;
return false;
}
// Otherwise let's update offsets:
_pastBytesOrChars += _inputEnd;
_rowStartOffset -= _inputEnd;
_inputPtr = 0;
try {
int count = _in.read(_inputBuffer, 0, _inputBuffer.length);
if (count < 1) {
_inputEnd = 0;
if (count == 0) {
/* Sanity check; should never happen with correctly written
* InputStreams...
*/
reportInputProblem("Reader returned 0 bytes, even when asked to read up to "+_inputBuffer.length);
}
return false;
}
_inputEnd = count;
return true;
} catch (IOException ioe) {
throw new IoStreamException(ioe);
}
}
protected final char loadOne() throws XMLStreamException
{
if (!loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(_currToken));
}
return _inputBuffer[_inputPtr++];
}
protected final char loadOne(int type)
throws XMLStreamException
{
if (!loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(type));
}
return _inputBuffer[_inputPtr++];
}
protected final boolean loadAndRetain(int nrOfChars)
throws XMLStreamException
{
/* first: can't move, if we were handed an immutable block
* (alternative to handing Reader as _in)
*/
if (_in == null) {
return false;
}
// otherwise, need to use cut'n pasted code from loadMore()...
_pastBytesOrChars += _inputPtr;
_rowStartOffset -= _inputPtr;
int remaining = (_inputEnd - _inputPtr); // must be > 0
System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, remaining);
_inputPtr = 0;
_inputEnd = remaining; // temporarily set to cover copied stuff
try {
do {
int max = _inputBuffer.length - _inputEnd;
int count = _in.read(_inputBuffer, _inputEnd, max);
if (count < 1) {
if (count == 0) {
// Sanity check, should never happen with non-buggy readers/stream
reportInputProblem("Reader returned 0 bytes, even when asked to read up to "+max);
}
return false;
}
_inputEnd += count;
} while (_inputEnd < nrOfChars);
return true;
} catch (IOException ioe) {
throw new IoStreamException(ioe);
}
}
}