/* Aalto XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import java.io.*;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.WFCException;
import com.fasterxml.aalto.util.XmlConsts;

Abstract base class that defines shared functionality between different bootstrappers (byte stream, char Readers, block input)
/** * Abstract base class that defines shared functionality between different * bootstrappers (byte stream, char Readers, block input) */
public abstract class InputBootstrapper implements XmlConsts { /* /********************************************************************** /* Shared string consts /********************************************************************** */ public final static String ERR_XMLDECL_KW_VERSION = "; expected keyword '"+XmlConsts.XML_DECL_KW_VERSION+"'"; public final static String ERR_XMLDECL_KW_ENCODING = "; expected keyword '"+XmlConsts.XML_DECL_KW_ENCODING+"'"; public final static String ERR_XMLDECL_KW_STANDALONE = "; expected keyword '"+XmlConsts.XML_DECL_KW_STANDALONE+"'"; public final static String ERR_XMLDECL_END_MARKER = "; expected \"?>\" end marker"; public final static String ERR_XMLDECL_EXP_SPACE = "; expected a white space"; public final static String ERR_XMLDECL_EXP_EQ = "; expected '=' after "; public final static String ERR_XMLDECL_EXP_ATTRVAL = "; expected a quote character enclosing value for "; /* /********************************************************************** /* Input location data /********************************************************************** */
Current number of input units (bytes or chars) that were processed in previous blocks, before contents of current input buffer.

Note: includes possible BOMs, if those were part of the input.

/** * Current number of input units (bytes or chars) that were processed in * previous blocks, * before contents of current input buffer. *<p> * Note: includes possible BOMs, if those were part of the input. */
protected int _inputProcessed = 0;
Current row location of current point in input buffer, using zero-based counting.
/** * Current row location of current point in input buffer, using * zero-based counting. */
protected int _inputRow = 0;
Current index of the first character of the current row in input buffer. Needed to calculate column position, if necessary; benefit of not having column itself is that this only has to be updated once per line.
/** * Current index of the first character of the current row in input * buffer. Needed to calculate column position, if necessary; benefit * of not having column itself is that this only has to be updated * once per line. */
protected int _inputRowStart = 0; /* /********************************************************************** /* Info passed by the caller /********************************************************************** */ final ReaderConfig _config; /* /********************************************************************** /* Info from XML declaration /********************************************************************** */ //boolean mHadDeclaration = false;
XML declaration from the input (1.0, 1.1 or 'unknown')
/** * XML declaration from the input (1.0, 1.1 or 'unknown') */
int mDeclaredXmlVersion = XmlConsts.XML_V_UNKNOWN;
Value of encoding pseudo-attribute from xml declaration, if one was found; null otherwise.
/** * Value of encoding pseudo-attribute from xml declaration, if * one was found; null otherwise. */
String mFoundEncoding; String mStandalone; /* /********************************************************************** //* Temporary data /********************************************************************** */
Need a short buffer to read in values of pseudo-attributes (version, encoding, standalone). Don't really need tons of space; just enough for the longest anticipated encoding id... and maybe few chars just in case (for additional white space that we ignore)
/** * Need a short buffer to read in values of pseudo-attributes (version, * encoding, standalone). Don't really need tons of space; just enough * for the longest anticipated encoding id... and maybe few chars just * in case (for additional white space that we ignore) */
final char[] mKeyword; /* /********************************************************************** /* Life-cycle /********************************************************************** */ protected InputBootstrapper(ReaderConfig cfg) { _config = cfg; mKeyword = cfg.allocSmallCBuffer(ReaderConfig.DEFAULT_SMALL_BUFFER_LEN); }
Main bootstrapping method, which will try to open the underlying input source, check its encoding, read xml declaration if there is one, and finally create a scanner for actual parsing.
/** * Main bootstrapping method, which will try to open the underlying * input source, check its encoding, read xml declaration if * there is one, and finally create a scanner for actual parsing. */
public abstract XmlScanner bootstrap() throws XMLStreamException; /* /********************************************************************** /* Package methods, parsing /********************************************************************** */
Method that will parse xml declaration, which at this point is known to exist.
/** * Method that will parse xml declaration, which at this point is * known to exist. */
protected void readXmlDeclaration() throws IOException, XMLStreamException { int c = getNextAfterWs(false); // First, version pseudo-attribute: if (c != 'v') { reportUnexpectedChar(c, ERR_XMLDECL_KW_VERSION); } else { // ok, should be version mDeclaredXmlVersion = readXmlVersion(); c = getWsOrChar('?'); } // Then, 'encoding' if (c == 'e') { mFoundEncoding = readXmlEncoding(); c = getWsOrChar('?'); } // Then, 'standalone' (for main doc) if (c == 's') { mStandalone = readXmlStandalone(); c = getWsOrChar('?'); } // And finally, need to have closing markers if (c != '?') { reportUnexpectedChar(c, ERR_XMLDECL_END_MARKER); } c = getNext(); if (c != '>') { reportUnexpectedChar(c, ERR_XMLDECL_END_MARKER); } }
Returns:Xml version declaration read
/** * @return Xml version declaration read */
private final int readXmlVersion() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_VERSION); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_VERSION); } c = handleEq(XmlConsts.XML_DECL_KW_VERSION); int len = readQuotedValue(mKeyword, c); if (len == 3) { if (mKeyword[0] == '1' && mKeyword[1] == '.') { c = mKeyword[2]; if (c == '0') { return XmlConsts.XML_V_10; } if (c == '1') { return XmlConsts.XML_V_11; } } } // Nope; error. -1 indicates run off... String got; if (len < 0) { got = "'"+new String(mKeyword)+"[..]'"; } else if (len == 0) { got = "<empty>"; } else { got = "'"+new String(mKeyword, 0, len)+"'"; } reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_VERSION, got, XmlConsts.XML_V_10_STR, XmlConsts.XML_V_11_STR); return XmlConsts.XML_V_UNKNOWN; // never gets here, but compiler needs it } private final String readXmlEncoding() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_ENCODING); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_ENCODING); } c = handleEq(XmlConsts.XML_DECL_KW_ENCODING); int len = readQuotedValue(mKeyword, c); /* Hmmh. How about "too long" encodings? Maybe just truncate them, * for now? */ if (len == 0) { // let's still detect missing value... reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_ENCODING, null, null, null); } if (len < 0) { // will be truncated... return new String(mKeyword); } return new String(mKeyword, 0, len); } private final String readXmlStandalone() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_STANDALONE); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_STANDALONE); } c = handleEq(XmlConsts.XML_DECL_KW_STANDALONE); int len = readQuotedValue(mKeyword, c); if (len == 2) { if (mKeyword[0] == 'n' && mKeyword[1] == 'o') { return XmlConsts.XML_SA_NO; } } else if (len == 3) { if (mKeyword[0] == 'y' && mKeyword[1] == 'e' && mKeyword[2] == 's') { return XmlConsts.XML_SA_YES; } } // Nope; error. -1 indicates run off... String got; if (len < 0) { got = "'"+new String(mKeyword)+"[..]'"; } else if (len == 0) { got = "<empty>"; } else { got = "'"+new String(mKeyword, 0, len)+"'"; } reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_STANDALONE, got, XmlConsts.XML_SA_YES, XmlConsts.XML_SA_NO); return got; // never gets here, but compiler can't figure it out } private final int handleEq(String attr) throws IOException, XMLStreamException { int c = getNextAfterWs(false); if (c != '=') { reportUnexpectedChar(c, ERR_XMLDECL_EXP_EQ+"'"+attr+"'"); } c = getNextAfterWs(false); if (c != '"' && c != '\'') { reportUnexpectedChar(c, ERR_XMLDECL_EXP_ATTRVAL+"'"+attr+"'"); } return c; }
Method that should get next character, which has to be either specified character (usually end marker), OR, any character as long as there' at least one space character before it.
/** * Method that should get next character, which has to be either specified * character (usually end marker), OR, any character as long as there' * at least one space character before it. */
private final int getWsOrChar(int ok) throws IOException, XMLStreamException { int c = getNext(); if (c == ok) { return c; } if (c > XmlConsts.CHAR_SPACE) { reportUnexpectedChar(c, "; expected either '"+((char) ok)+"' or white space"); } if (c == XmlConsts.CHAR_LF || c == XmlConsts.CHAR_CR) { // Need to push it back to be processed properly pushback(); } return getNextAfterWs(false); } /* /********************************************************************** /* Abstract parsing methods for sub-classes to implement /********************************************************************** */ protected abstract void pushback(); protected abstract int getNext() throws IOException, XMLStreamException; protected abstract int getNextAfterWs(boolean reqWs) throws IOException, XMLStreamException;
Returns:First character that does not match expected, if any; CHAR_NULL if match succeeded
/** * @return First character that does not match expected, if any; * CHAR_NULL if match succeeded */
protected abstract int checkKeyword(String exp) throws IOException, XMLStreamException; protected abstract int readQuotedValue(char[] kw, int quoteChar) throws IOException, XMLStreamException; protected abstract Location getLocation(); /* /********************************************************************** /* Error reporting /********************************************************************** */ protected void reportXmlProblem(String msg) throws XMLStreamException { throw new WFCException(msg, getLocation()); } protected void reportNull() throws XMLStreamException { reportXmlProblem("Illegal null byte/char in input stream"); } protected void reportEof() throws XMLStreamException { reportXmlProblem("Unexpected end-of-input in xml declaration"); } protected void reportUnexpectedChar(int i, String msg) throws XMLStreamException { String excMsg; if (Character.isISOControl((char) i)) { excMsg = "Unexpected character (CTRL-CHAR, code "+i+")"+msg; } else { excMsg = "Unexpected character '"+((char) i)+"' (code "+i+")"+msg; } reportXmlProblem(excMsg); } protected final void reportPseudoAttrProblem(String attrName, String got, String expVal1, String expVal2) throws XMLStreamException { String expStr = (expVal1 == null) ? "" : ("; expected \""+expVal1+"\" or \""+expVal2+"\""); if (got == null || got.length() == 0) { reportXmlProblem("Missing XML pseudo-attribute '"+attrName+"' value"+expStr); } reportXmlProblem("Invalid XML pseudo-attribute '"+attrName+"' value "+got+expStr); } }