package com.fasterxml.jackson.dataformat.csv.impl;
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.JsonParser.NumberType;
import com.fasterxml.jackson.core.json.JsonReadContext;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
Low-level helper class that handles actual reading of CSV,
purely based on indexes given without worrying about reordering etc.
/**
* Low-level helper class that handles actual reading of CSV,
* purely based on indexes given without worrying about reordering etc.
*/
public class CsvDecoder
{
private final static int INT_SPACE = 0x0020;
private final static int INT_CR = '\r';
private final static int INT_LF = '\n';
private final static int INT_HASH = '#';
/*
/**********************************************************************
/* Input handling, configuration
/**********************************************************************
*/
Unfortunate back reference, needed for error reporting
/**
* Unfortunate back reference, needed for error reporting
*/
final protected CsvParser _owner;
I/O context for this reader. It handles buffer allocation
for the reader.
/**
* I/O context for this reader. It handles buffer allocation
* for the reader.
*/
final protected IOContext _ioContext;
Input stream that can be used for reading more content, if one
in use. May be null, if input comes just as a full buffer,
or if the stream has been closed.
/**
* Input stream that can be used for reading more content, if one
* in use. May be null, if input comes just as a full buffer,
* or if the stream has been closed.
*/
protected Reader _inputSource;
Flag that indicates whether the input buffer is recycable (and
needs to be returned to recycler once we are done) or not.
If it is not, it also means that parser can NOT modify underlying
buffer.
/**
* Flag that indicates whether the input buffer is recycable (and
* needs to be returned to recycler once we are done) or not.
*<p>
* If it is not, it also means that parser can NOT modify underlying
* buffer.
*/
protected boolean _bufferRecyclable;
protected boolean _autoCloseInput;
Configuration flag that determines whether spaces surrounding
separator characters are to be automatically trimmed or not.
/**
* Configuration flag that determines whether spaces surrounding
* separator characters are to be automatically trimmed or not.
*/
protected boolean _trimSpaces;
protected boolean _allowComments;
Since: 2.10.1
/**
* @since 2.10.1
*/
protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before
Maximum of quote character, linefeeds (\r and \n), escape character.
/**
* Maximum of quote character, linefeeds (\r and \n), escape character.
*/
protected int _maxSpecialChar;
protected int _separatorChar;
protected int _quoteChar;
protected int _escapeChar;
/*
/**********************************************************************
/* Input handling, state
/**********************************************************************
*/
Buffer that contains contents of all values after processing
of doubled-quotes, escaped characters.
/**
* Buffer that contains contents of all values after processing
* of doubled-quotes, escaped characters.
*/
protected final TextBuffer _textBuffer;
Current buffer from which data is read; generally data is read into
buffer from input source, but in some cases pre-loaded buffer
is handed to the parser.
/**
* Current buffer from which data is read; generally data is read into
* buffer from input source, but in some cases pre-loaded buffer
* is handed to the parser.
*/
protected char[] _inputBuffer;
Pointer to next available character in buffer
/**
* Pointer to next available character in buffer
*/
protected int _inputPtr = 0;
Index of character after last available one in the buffer.
/**
* Index of character after last available one in the buffer.
*/
protected int _inputEnd = 0;
Marker to indicate that a linefeed was encountered and now
needs to be handled (indicates end-of-record).
/**
* Marker to indicate that a linefeed was encountered and now
* needs to be handled (indicates end-of-record).
*/
protected int _pendingLF = 0;
Flag that indicates whether parser is closed or not. Gets set when parser is either closed by explicit call (close
) or when end-of-input is reached. /**
* Flag that indicates whether parser is closed or not. Gets
* set when parser is either closed by explicit call
* ({@link #close}) or when end-of-input is reached.
*/
protected boolean _closed;
/*
/**********************************************************************
/* Current input location information
/**********************************************************************
*/
Number of characters/bytes that were contained in previous blocks
(blocks that were already processed prior to the current buffer).
/**
* Number of characters/bytes that were contained in previous blocks
* (blocks that were already processed prior to the current buffer).
*/
protected long _currInputProcessed = 0L;
Current row location of current point in input buffer, starting
from 1, if available.
/**
* Current row location of current point in input buffer, starting
* from 1, if available.
*/
protected int _currInputRow = 1;
Current index of the first character of the current row in input
buffer. Needed to calculate column position, if necessary; benefit
of not having column itself is that this only has to be updated
once per line.
/**
* Current index of the first character of the current row in input
* buffer. Needed to calculate column position, if necessary; benefit
* of not having column itself is that this only has to be updated
* once per line.
*/
protected int _currInputRowStart = 0;
// // // Location info at point when current token was started
Total number of bytes/characters read before start of current token.
For big (gigabyte-sized) sizes are possible, needs to be long,
unlike pointers and sizes related to in-memory buffers.
/**
* Total number of bytes/characters read before start of current token.
* For big (gigabyte-sized) sizes are possible, needs to be long,
* unlike pointers and sizes related to in-memory buffers.
*/
protected long _tokenInputTotal = 0;
Input row on which current token starts, 1-based
/**
* Input row on which current token starts, 1-based
*/
protected int _tokenInputRow = 1;
Column on input row that current token starts; 0-based (although
in the end it'll be converted to 1-based)
/**
* Column on input row that current token starts; 0-based (although
* in the end it'll be converted to 1-based)
*/
protected int _tokenInputCol = 0;
/*
/**********************************************************************
/* Constants and fields of former 'JsonNumericParserBase'
/**********************************************************************
*/
final protected static int NR_UNKNOWN = 0;
// First, integer types
final protected static int NR_INT = 0x0001;
final protected static int NR_LONG = 0x0002;
final protected static int NR_BIGINT = 0x0004;
// And then floating point types
final protected static int NR_DOUBLE = 0x008;
final protected static int NR_BIGDECIMAL = 0x0010;
// Also, we need some numeric constants
final static BigDecimal BD_MIN_LONG = new BigDecimal(Long.MIN_VALUE);
final static BigDecimal BD_MAX_LONG = new BigDecimal(Long.MAX_VALUE);
final static BigDecimal BD_MIN_INT = new BigDecimal(Long.MIN_VALUE);
final static BigDecimal BD_MAX_INT = new BigDecimal(Long.MAX_VALUE);
final static long MIN_INT_L = Integer.MIN_VALUE;
final static long MAX_INT_L = Integer.MAX_VALUE;
// These are not very accurate, but have to do... (for bounds checks)
final static double MIN_LONG_D = Long.MIN_VALUE;
final static double MAX_LONG_D = Long.MAX_VALUE;
final static double MIN_INT_D = Integer.MIN_VALUE;
final static double MAX_INT_D = Integer.MAX_VALUE;
// Digits, numeric
final protected static int INT_0 = '0';
final protected static int INT_1 = '1';
final protected static int INT_2 = '2';
final protected static int INT_3 = '3';
final protected static int INT_4 = '4';
final protected static int INT_5 = '5';
final protected static int INT_6 = '6';
final protected static int INT_7 = '7';
final protected static int INT_8 = '8';
final protected static int INT_9 = '9';
final protected static int INT_MINUS = '-';
final protected static int INT_PLUS = '+';
final protected static int INT_DECIMAL_POINT = '.';
final protected static int INT_e = 'e';
final protected static int INT_E = 'E';
final protected static char CHAR_NULL = '\0';
// Numeric value holders: multiple fields used for
// for efficiency
Bitfield that indicates which numeric representations
have been calculated for the current type
/**
* Bitfield that indicates which numeric representations
* have been calculated for the current type
*/
protected int _numTypesValid = NR_UNKNOWN;
// First primitives
protected int _numberInt;
protected long _numberLong;
protected double _numberDouble;
// And then object types
protected BigInteger _numberBigInt;
protected BigDecimal _numberBigDecimal;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema,
TextBuffer textBuffer,
int stdFeatures, int csvFeatures)
{
_owner = owner;
_ioContext = ctxt;
_inputSource = r;
_textBuffer = textBuffer;
_autoCloseInput = JsonParser.Feature.AUTO_CLOSE_SOURCE.enabledIn(stdFeatures);
final boolean oldComments = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures);
_allowComments = oldComments | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
_inputBuffer = ctxt.allocTokenBuffer();
_bufferRecyclable = true; // since we allocated it
_inputSource = r;
_tokenInputRow = -1;
_tokenInputCol = -1;
setSchema(schema);
}
public void setSchema(CsvSchema schema)
{
_separatorChar = schema.getColumnSeparator();
_quoteChar = schema.getQuoteChar();
_escapeChar = schema.getEscapeChar();
if (!_allowComments) {
_allowComments = schema.allowsComments();
}
int max = Math.max(_separatorChar, _quoteChar);
max = Math.max(max, _escapeChar);
max = Math.max(max, '\r');
max = Math.max(max, '\n');
_maxSpecialChar = max;
}
Since: 2.7
/**
* @since 2.7
*/
public void overrideFormatFeatures(int csvFeatures) {
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
// 07-Oct-2019, tatu: not 100% accurate, as we have no access to legacy
// setting. But close enough, fixed in 3.0
if (CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures)) {
_allowComments = true;
}
}
/*
/**********************************************************************
/* JsonParser implementations passed-through by CsvParser
/**********************************************************************
*/
public Object getInputSource() {
return _inputSource;
}
public boolean isClosed() { return _closed; }
public void close() throws IOException
{
_pendingLF = 1; // just to ensure we'll also check _closed flag later on
if (!_closed) {
_closed = true;
try {
_closeInput();
} finally {
// Also, internal buffer(s) can now be released as well
releaseBuffers();
}
}
}
public int releaseBuffered(Writer out) throws IOException
{
int count = _inputEnd - _inputPtr;
if (count < 1) {
return 0;
}
// let's just advance ptr to end
int origPtr = _inputPtr;
out.write(_inputBuffer, origPtr, count);
return count;
}
public JsonReadContext childArrayContext(JsonReadContext context)
{
int col = _inputPtr - _currInputRowStart + 1; // 1-based
return context.createChildArrayContext(_currInputRow, col);
}
public JsonReadContext childObjectContext(JsonReadContext context)
{
int col = _inputPtr - _currInputRowStart + 1; // 1-based
return context.createChildObjectContext(_currInputRow, col);
}
public JsonLocation getTokenLocation()
{
return new JsonLocation(_inputSource, getTokenCharacterOffset(),
getTokenLineNr(), getTokenColumnNr());
}
public JsonLocation getCurrentLocation()
{
int ptr = _inputPtr;
/* One twist: when dealing with a "pending LF", need to
* go back one position when calculating location
*/
if (_pendingLF > 1) { // 1 is used as marker for end-of-input
--ptr;
}
int col = ptr - _currInputRowStart + 1; // 1-based
return new JsonLocation(_inputSource,
_currInputProcessed + ptr - 1, _currInputRow, col);
}
public final int getCurrentRow() {
return _currInputRow;
}
public final int getCurrentColumn() {
int ptr = _inputPtr;
// One twist: when dealing with a "pending LF", need to
// go back one position when calculating location
if (_pendingLF > 1) { // 1 is used as marker for end-of-input
--ptr;
}
return ptr - _currInputRowStart + 1; // 1-based
}
/*
/**********************************************************************
/* Helper methods, input handling
/**********************************************************************
*/
protected final long getTokenCharacterOffset() { return _tokenInputTotal; }
protected final int getTokenLineNr() { return _tokenInputRow; }
protected final int getTokenColumnNr() {
// note: value of -1 means "not available"; otherwise convert from 0-based to 1-based
int col = _tokenInputCol;
return (col < 0) ? col : (col + 1);
}
protected void releaseBuffers() throws IOException
{
_textBuffer.releaseBuffers();
char[] buf = _inputBuffer;
if (buf != null) {
_inputBuffer = null;
_ioContext.releaseTokenBuffer(buf);
}
}
protected void _closeInput() throws IOException
{
_pendingLF = 1; // just to ensure we'll also check _closed flag later on
/* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close()
* on the underlying Reader, unless we "own" it, or auto-closing
* feature is enabled.
* One downside is that when using our optimized
* Reader (granted, we only do that for UTF-32...) this
* means that buffer recycling won't work correctly.
*/
if (_inputSource != null) {
if (_autoCloseInput || _ioContext.isResourceManaged()) {
_inputSource.close();
}
_inputSource = null;
}
}
protected final boolean loadMore() throws IOException
{
_currInputProcessed += _inputEnd;
_currInputRowStart -= _inputEnd;
if (_inputSource != null) {
int count = _inputSource.read(_inputBuffer, 0, _inputBuffer.length);
_inputEnd = count;
if (count > 0) {
_inputPtr = 0;
return true;
}
/* End of input; close here -- but note, do NOT yet call releaseBuffers()
* as there may be buffered input to handle
*/
_closeInput();
// Should never return 0, so let's fail
if (count == 0) {
throw new IOException("InputStream.read() returned 0 characters when trying to read "+_inputBuffer.length+" bytes");
}
}
return false;
}
/*
/**********************************************************************
/* Actual parsing, access methods
/**********************************************************************
*/
public String getText() {
return _textBuffer.contentsAsString();
}
Method that can be called to see if there is at least one more
character to be parsed.
/**
* Method that can be called to see if there is at least one more
* character to be parsed.
*/
public boolean hasMoreInput() throws IOException
{
if (_inputPtr < _inputEnd) {
return true;
}
return loadMore();
}
Method called to handle details of starting a new line, which may
include skipping a linefeed.
Returns: True if there is a new data line to handle; false if not
/**
* Method called to handle details of starting a new line, which may
* include skipping a linefeed.
*
* @return True if there is a new data line to handle; false if not
*/
public boolean startNewLine() throws IOException
{
// first: if pending LF, skip it
if (_pendingLF != 0) {
if (_inputSource == null) {
return false;
}
_handleLF();
}
return skipLinesWhenNeeded();
}
optionally skip lines that are empty or are comments, depending on the feature activated in the parser
Throws: Returns: false if the end of input was reached Since: 2.10.1
/**
* optionally skip lines that are empty or are comments, depending on the feature activated in the parser
* @return false if the end of input was reached
* @throws IOException
* @since 2.10.1
*/
public boolean skipLinesWhenNeeded() throws IOException {
if (!(_allowComments || _skipBlankLines)) {
return hasMoreInput();
}
int firstCharacterPtr = _inputPtr;
while (hasMoreInput()) {
char ch = _inputBuffer[_inputPtr++];
if (ch == '\r' || ch == '\n') {
_pendingLF = ch;
_handleLF();
// track the start of the new line
firstCharacterPtr = _inputPtr;
continue;
}
if (ch == ' ') {
// skip all blanks (in both comments/blanks skip mode)
continue;
}
if (_allowComments) {
if (_inputBuffer[firstCharacterPtr] == '#') {
// on a commented line, skip everything
continue;
}
if (ch == '#') {
// we reach this point when whitespaces precedes the hash character
// move the firstCharacterPtr to the '#' location in order to skip the line completely
firstCharacterPtr = _inputPtr-1;
continue;
}
}
// we reached a non skippable character, this line needs to be parsed
// rollback the input pointer to the beginning of the line
_inputPtr = firstCharacterPtr;
return true; // processing can go on
}
return false; // end of input
}
protected int _skipCommentLines() throws IOException
{
while ((_inputPtr < _inputEnd) || loadMore()) {
char ch = _inputBuffer[_inputPtr++];
if (ch >= ' ' || (ch != '\r' && ch != '\n')) {
continue;
}
_pendingLF = ch;
_handleLF();
// Ok, skipped the end of the line. Check next one...
int i = _nextChar();
if (i != INT_HASH) {
return i;
}
}
return -1; // end of input
}
Method called to blindly skip a single line of content, without considering
aspects like quoting or escaping. Used currently simply to skip the first
line of input document, if instructed to do so.
/**
* Method called to blindly skip a single line of content, without considering
* aspects like quoting or escaping. Used currently simply to skip the first
* line of input document, if instructed to do so.
*/
public boolean skipLine() throws IOException
{
if (_pendingLF != 0) {
if (_inputSource == null) {
return false;
}
_handleLF();
}
while (_inputPtr < _inputEnd || loadMore()) {
char c = _inputBuffer[_inputPtr++];
if (c == '\r' || c == '\n') {
// important: handle trailing linefeed now, so caller need not bother
_pendingLF = c;
_handleLF();
return true;
}
}
return false;
}
Method called to parse the next token when we don't have any type
information, so that all tokens are exposed as basic String
values.
Returns: Column value if more found; null to indicate end of line
of input
/**
* Method called to parse the next token when we don't have any type
* information, so that all tokens are exposed as basic String
* values.
*
* @return Column value if more found; null to indicate end of line
* of input
*/
public String nextString() throws IOException
{
_numTypesValid = NR_UNKNOWN;
if (_pendingLF > 0) { // either pendingLF, or closed
if (_inputSource != null) { // if closed, we just need to return null
_handleLF();
}
return null; // end of line without new value
}
int i;
if (_trimSpaces) {
i = _skipLeadingSpace();
} else {
i = _nextChar();
}
// First, need to ensure we know the starting location of token
_tokenInputTotal = _currInputProcessed + _inputPtr - 1;
_tokenInputRow = _currInputRow;
_tokenInputCol = _inputPtr - _currInputRowStart - 1;
if (i < 0) { // EOF at this point signifies empty value
return "";
}
if (i == INT_CR || i == INT_LF) { // end-of-line means end of record; but also need to handle LF later on
_pendingLF = i;
return "";
}
// two modes: quoted, unquoted
if (i == _quoteChar) { // offline quoted case (longer)
return _nextQuotedString();
}
if (i == _separatorChar) {
_textBuffer.resetWithString("");
return "";
}
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
outBuf[0] = (char) i;
int outPtr = 1;
if (i == _escapeChar) {
// Reset the escaped character
outBuf[0] = _unescape();
return _nextUnquotedString(outBuf, outPtr);
}
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // ok to have end-of-input but...
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
ptr = _inputPtr;
}
final int end;
{
int max = Math.min(_inputEnd - ptr, outBuf.length - outPtr);
end = ptr + max;
}
// handle unquoted case locally if it can be handled without
// crossing buffer boundary...
char[] inputBuffer = _inputBuffer;
while (ptr < end) {
char c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _separatorChar) { // end of value, yay!
_inputPtr = ptr;
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
if (c == '\r' || c == '\n') {
_pendingLF = c;
_inputPtr = ptr;
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
if (c == _escapeChar) {
--ptr;
break;
}
}
outBuf[outPtr++] = c;
}
// ok, either input or output across buffer boundary, offline
_inputPtr = ptr;
return _nextUnquotedString(outBuf, outPtr);
}
public JsonToken nextStringOrLiteral() throws IOException
{
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
public JsonToken nextNumber() throws IOException
{
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
public JsonToken nextNumberOrString() throws IOException
{
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
/*
/**********************************************************************
/* Actual parsing, private helper methods
/**********************************************************************
*/
protected String _nextUnquotedString(char[] outBuf, int outPtr) throws IOException
{
int c;
final char[] inputBuffer = _inputBuffer;
main_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // ok to have end-of-input, are done
_inputPtr = ptr;
break main_loop;
}
ptr = _inputPtr;
}
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
final int max = Math.min(_inputEnd, (ptr + (outBuf.length - outPtr)));
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _separatorChar) { // end of value, yay!
_inputPtr = ptr;
break main_loop;
}
if (c == '\r' || c == '\n') { // end of line is end of value as well
_inputPtr = ptr;
_pendingLF = c;
break main_loop;
}
if (c == _escapeChar) {
_inputPtr = ptr;
outBuf[outPtr++] = _unescape();
// May have passed input boundary, need to re-set
continue main_loop;
}
}
outBuf[outPtr++] = (char) c;
}
_inputPtr = ptr;
}
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
protected String _nextQuotedString() throws IOException
{
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
int outPtr = 0;
final char[] inputBuffer = _inputBuffer;
boolean checkLF = false; // marker for split CR+LF
main_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // not ok, missing end quote
_owner._reportParsingError("Missing closing quote for value"); // should indicate start position?
}
ptr = _inputPtr;
if (checkLF && inputBuffer[ptr] == '\n') {
// undo earlier advancement, to keep line number correct
--_currInputRow;
}
}
if (checkLF) { // had a "hanging" CR in parse loop; check now
}
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
final int max = Math.min(_inputEnd, (ptr + (outBuf.length - outPtr)));
inner_loop:
while (true) {
char c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _quoteChar) {
_inputPtr = ptr;
break;
}
// Embedded linefeeds are fine
if (c == '\r') {
// bit crappy check but has to do:
if (ptr >= max) {
checkLF = true; // will need to be checked in beginning of next loop
++_currInputRow;
_currInputRowStart = ptr;
} else if (inputBuffer[ptr] != '\n') {
++_currInputRow;
_currInputRowStart = ptr;
}
} else if (c == '\n') {
++_currInputRow;
_currInputRowStart = ptr;
} else if (c == _escapeChar) {
_inputPtr = ptr;
c = _unescape();
outBuf[outPtr++] = c;
// May have passed input boundary, need to re-set
continue main_loop;
}
}
outBuf[outPtr++] = c;
if (ptr >= max) {
_inputPtr = ptr;
continue main_loop;
}
continue inner_loop;
}
// We get here if we hit a quote: check if it's doubled up, or end of value:
if (_inputPtr < _inputEnd || loadMore()) {
if (_inputBuffer[_inputPtr] == _quoteChar) { // doubled up, append
// note: should have enough room, is safe
outBuf[outPtr++] = (char) _quoteChar;
++_inputPtr;
continue main_loop;
}
}
// Not doubled; leave next char as is
break;
}
// note: do NOT trim from within quoted Strings
String result = _textBuffer.finishAndReturn(outPtr, false);
// good, but we also need to locate and skip trailing space, separator
// (note: space outside quotes never included, but must be skipped)
while (_inputPtr < _inputEnd || loadMore()) { // end-of-input is fine
int ch = _inputBuffer[_inputPtr++];
if (ch == _separatorChar) { // common case, separator between columns
break;
}
if (ch <= INT_SPACE) { // extra space, fine as well
if (ch == INT_CR || ch == INT_LF) { // but end-of-line can't be yet skipped
_pendingLF = ch;
break;
}
continue;
}
_owner._reportUnexpectedCsvChar(ch, String.format(
"Expected separator (%s) or end-of-line", _getCharDesc(_quoteChar)));
}
return result;
}
protected final void _handleLF() throws IOException
{
// already skipped past first part; but may get \r\n so skip the other char too?
if (_pendingLF == INT_CR) {
if (_inputPtr < _inputEnd || loadMore()) {
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
}
}
_pendingLF = 0;
++_currInputRow;
_currInputRowStart = _inputPtr;
}
protected char _unescape() throws IOException
{
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportError("Unexpected EOF in escaped character");
}
}
// Some characters are more special than others, so:
char c = _inputBuffer[_inputPtr++];
switch (c) {
case '0':
return '\0';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
}
// others, return as is...
return c;
}
protected final int _nextChar() throws IOException
{
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
return -1;
}
}
return _inputBuffer[_inputPtr++];
}
protected final int _skipLeadingSpace() throws IOException
{
final int sep = _separatorChar;
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
return -1;
}
}
char ch = _inputBuffer[_inputPtr++];
if ((ch > ' ') || (ch == sep)) {
return ch;
}
switch (ch) {
case '\r':
case '\n':
return ch;
}
}
}
/*
/**********************************************************************
/* Numeric accessors for CsvParser
/**********************************************************************
*/
public Number getNumberValue() throws IOException
{
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_UNKNOWN); // will also check event type
}
// Separate types for int types
if ((_numTypesValid & NR_INT) != 0) {
return Integer.valueOf(_numberInt);
}
if ((_numTypesValid & NR_LONG) != 0) {
return Long.valueOf(_numberLong);
}
if ((_numTypesValid & NR_BIGINT) != 0) {
return _numberBigInt;
}
// And then floating point types. But here optimal type
// needs to be big decimal, to avoid losing any data?
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
return _numberBigDecimal;
}
if ((_numTypesValid & NR_DOUBLE) == 0) { // sanity check
_throwInternal();
}
return Double.valueOf(_numberDouble);
}
public NumberType getNumberType() throws IOException
{
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_UNKNOWN); // will also check event type
}
if ((_numTypesValid & NR_INT) != 0) {
return NumberType.INT;
}
if ((_numTypesValid & NR_LONG) != 0) {
return NumberType.LONG;
}
if ((_numTypesValid & NR_BIGINT) != 0) {
return NumberType.BIG_INTEGER;
}
// And then floating point types. Here optimal type
// needs to be big decimal, to avoid losing any data?
// However... using BD is slow, so let's allow returning
// double as type if no explicit call has been made to access data as BD?
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
return NumberType.BIG_DECIMAL;
}
return NumberType.DOUBLE;
}
public int getIntValue() throws IOException
{
if ((_numTypesValid & NR_INT) == 0) {
if (_numTypesValid == NR_UNKNOWN) { // not parsed at all
_parseNumericValue(NR_INT); // will also check event type
}
if ((_numTypesValid & NR_INT) == 0) { // wasn't an int natively?
convertNumberToInt(); // let's make it so, if possible
}
}
return _numberInt;
}
public long getLongValue() throws IOException
{
if ((_numTypesValid & NR_LONG) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_LONG);
}
if ((_numTypesValid & NR_LONG) == 0) {
convertNumberToLong();
}
}
return _numberLong;
}
public BigInteger getBigIntegerValue() throws IOException
{
if ((_numTypesValid & NR_BIGINT) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_BIGINT);
}
if ((_numTypesValid & NR_BIGINT) == 0) {
convertNumberToBigInteger();
}
}
return _numberBigInt;
}
public float getFloatValue() throws IOException
{
double value = getDoubleValue();
// Bounds/range checks would be tricky here, so let's not bother...
return (float) value;
}
public double getDoubleValue() throws IOException
{
if ((_numTypesValid & NR_DOUBLE) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_DOUBLE);
}
if ((_numTypesValid & NR_DOUBLE) == 0) {
convertNumberToDouble();
}
}
return _numberDouble;
}
public BigDecimal getDecimalValue() throws IOException
{
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(NR_BIGDECIMAL);
}
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
convertNumberToBigDecimal();
}
}
return _numberBigDecimal;
}
/*
/**********************************************************************
/* Conversion from textual to numeric representation
/**********************************************************************
*/
Method that will parse actual numeric value out of a syntactically
valid number value. Type it will parse into depends on whether
it is a floating point number, as well as its magnitude: smallest
legal type (of ones available) is used for efficiency.
Params: - expType – Numeric type that we will immediately need, if any;
mostly necessary to optimize handling of floating point numbers
/**
* Method that will parse actual numeric value out of a syntactically
* valid number value. Type it will parse into depends on whether
* it is a floating point number, as well as its magnitude: smallest
* legal type (of ones available) is used for efficiency.
*
* @param expType Numeric type that we will immediately need, if any;
* mostly necessary to optimize handling of floating point numbers
*/
protected void _parseNumericValue(int expType)
throws IOException
{
// Int or float?
if (_textBuffer.looksLikeInt()) {
char[] buf = _textBuffer.getTextBuffer();
int offset = _textBuffer.getTextOffset();
char c = buf[offset];
boolean neg;
if (c == '-') {
neg = true;
++offset;
} else {
neg = false;
if (c == '+') {
++offset;
}
}
int len = buf.length - offset;
if (len <= 9) { // definitely fits in int
int i = NumberInput.parseInt(buf, offset, len);
_numberInt = neg ? -i : i;
_numTypesValid = NR_INT;
return;
}
if (len <= 18) { // definitely fits AND is easy to parse using 2 int parse calls
long l = NumberInput.parseLong(buf, offset, len);
if (neg) {
l = -l;
}
// [JACKSON-230] Could still fit in int, need to check
if (len == 10) {
if (neg) {
if (l >= MIN_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
} else {
if (l <= MAX_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
}
}
_numberLong = l;
_numTypesValid = NR_LONG;
return;
}
_parseSlowIntValue(expType, buf, offset, len, neg);
return;
}
/*
if (_hasFloatToken() == JsonToken.VALUE_NUMBER_FLOAT) {
_parseSlowFloatValue(expType);
return;
}
_reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors");
*/
_parseSlowFloatValue(expType);
}
private final void _parseSlowFloatValue(int expType)
throws IOException
{
/* Nope: floating point. Here we need to be careful to get
* optimal parsing strategy: choice is between accurate but
* slow (BigDecimal) and lossy but fast (Double). For now
* let's only use BD when explicitly requested -- it can
* still be constructed correctly at any point since we do
* retain textual representation
*/
try {
if (expType == NR_BIGDECIMAL) {
_numberBigDecimal = _textBuffer.contentsAsDecimal();
_numTypesValid = NR_BIGDECIMAL;
} else {
// Otherwise double has to do
_numberDouble = _textBuffer.contentsAsDouble();
_numTypesValid = NR_DOUBLE;
}
} catch (NumberFormatException nex) {
// Can this ever occur? Due to overflow, maybe?
throw constructError("Malformed numeric value '"+_textBuffer.contentsAsString()+"'", nex);
}
}
private final void _parseSlowIntValue(int expType, char[] buf, int offset, int len,
boolean neg)
throws IOException
{
String numStr = _textBuffer.contentsAsString();
try {
if (NumberInput.inLongRange(buf, offset, len, neg)) {
// Probably faster to construct a String, call parse, than to use BigInteger
_numberLong = Long.parseLong(numStr);
_numTypesValid = NR_LONG;
} else {
// nope, need the heavy guns... (rare case)
_numberBigInt = new BigInteger(numStr);
_numTypesValid = NR_BIGINT;
}
} catch (NumberFormatException nex) {
// Can this ever occur? Due to overflow, maybe?
throw constructError("Malformed numeric value '"+numStr+"'", nex);
}
}
/*
/**********************************************************************
/* Numeric conversions
/**********************************************************************
*/
protected void convertNumberToInt() throws IOException
{
// First, converting from long ought to be easy
if ((_numTypesValid & NR_LONG) != 0) {
// Let's verify it's lossless conversion by simple roundtrip
int result = (int) _numberLong;
if (result != _numberLong) {
_reportError("Numeric value ("+getText()+") out of range of int");
}
_numberInt = result;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
// !!! Should check for range...
_numberInt = _numberBigInt.intValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
// Need to check boundaries
if (_numberDouble < MIN_INT_D || _numberDouble > MAX_INT_D) {
reportOverflowInt();
}
_numberInt = (int) _numberDouble;
} else if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
if (BD_MIN_INT.compareTo(_numberBigDecimal) > 0
|| BD_MAX_INT.compareTo(_numberBigDecimal) < 0) {
reportOverflowInt();
}
_numberInt = _numberBigDecimal.intValue();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_INT;
}
protected void convertNumberToLong() throws IOException
{
if ((_numTypesValid & NR_INT) != 0) {
_numberLong = _numberInt;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
// !!! Should check for range...
_numberLong = _numberBigInt.longValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
// Need to check boundaries
if (_numberDouble < MIN_LONG_D || _numberDouble > MAX_LONG_D) {
reportOverflowLong();
}
_numberLong = (long) _numberDouble;
} else if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
if (BD_MIN_LONG.compareTo(_numberBigDecimal) > 0
|| BD_MAX_LONG.compareTo(_numberBigDecimal) < 0) {
reportOverflowLong();
}
_numberLong = _numberBigDecimal.longValue();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_LONG;
}
protected void convertNumberToBigInteger()
throws IOException
{
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
// here it'll just get truncated, no exceptions thrown
_numberBigInt = _numberBigDecimal.toBigInteger();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigInt = BigInteger.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigInt = BigInteger.valueOf(_numberInt);
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
_numberBigInt = BigDecimal.valueOf(_numberDouble).toBigInteger();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_BIGINT;
}
protected void convertNumberToDouble()
throws IOException
{
/* 05-Aug-2008, tatus: Important note: this MUST start with
* more accurate representations, since we don't know which
* value is the original one (others get generated when
* requested)
*/
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
_numberDouble = _numberBigDecimal.doubleValue();
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberDouble = _numberBigInt.doubleValue();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberDouble = _numberLong;
} else if ((_numTypesValid & NR_INT) != 0) {
_numberDouble = _numberInt;
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_DOUBLE;
}
protected void convertNumberToBigDecimal() throws IOException
{
if ((_numTypesValid & NR_DOUBLE) != 0) {
/* Let's actually parse from String representation, to avoid
* rounding errors that non-decimal floating operations could incur
*/
_numberBigDecimal = new BigDecimal(getText());
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberBigDecimal = new BigDecimal(_numberBigInt);
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberInt);
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_BIGDECIMAL;
}
/*
/**********************************************************
/* Number handling exceptions
/**********************************************************
*/
protected void reportUnexpectedNumberChar(int ch, String comment)
throws JsonParseException
{
String msg = "Unexpected character ("+_getCharDesc(ch)+") in numeric value";
if (comment != null) {
msg += ": "+comment;
}
_reportError(msg);
}
protected void reportInvalidNumber(String msg) throws JsonParseException {
_reportError("Invalid numeric value: "+msg);
}
protected void reportOverflowInt() throws IOException {
_reportError("Numeric value ("+getText()+") out of range of int ("+Integer.MIN_VALUE+" - "+Integer.MAX_VALUE+")");
}
protected void reportOverflowLong() throws IOException {
_reportError("Numeric value ("+getText()+") out of range of long ("+Long.MIN_VALUE+" - "+Long.MAX_VALUE+")");
}
protected final JsonParseException constructError(String msg, Throwable t) {
return new JsonParseException(_owner, msg, t);
}
protected final static String _getCharDesc(int ch)
{
char c = (char) ch;
if (Character.isISOControl(c)) {
return "(CTRL-CHAR, code "+ch+")";
}
if (ch > 255) {
return "'"+c+"' (code "+ch+" / 0x"+Integer.toHexString(ch)+")";
}
return "'"+c+"' (code "+ch+")";
}
private void _throwInternal() {
throw new IllegalStateException("Internal error: code path should never get executed");
}
Method for reporting low-level decoding (parsing) problems
/**
* Method for reporting low-level decoding (parsing) problems
*/
protected final void _reportError(String msg) throws JsonParseException {
throw new JsonParseException(_owner, msg);
}
}