/*
 * Copyright 2004-2019 H2 Group. Multiple-Licensed under the MPL 2.0,
 * and the EPL 1.0 (http://h2database.com/html/license.html).
 * Initial Developer: H2 Group
 */
package org.h2.tools;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import org.h2.api.ErrorCode;
import org.h2.engine.Constants;
import org.h2.engine.SysProperties;
import org.h2.message.DbException;
import org.h2.store.fs.FileUtils;
import org.h2.util.IOUtils;
import org.h2.util.JdbcUtils;
import org.h2.util.StringUtils;
import org.h2.util.Utils;

A facility to read from and write to CSV (comma separated values) files. When reading, the BOM (the byte-order-mark) character 0xfeff at the beginning of the file is ignored.
Author:Thomas Mueller, Sylvain Cuaz
/** * A facility to read from and write to CSV (comma separated values) files. When * reading, the BOM (the byte-order-mark) character 0xfeff at the beginning of * the file is ignored. * * @author Thomas Mueller, Sylvain Cuaz */
public class Csv implements SimpleRowSource { private String[] columnNames; private String characterSet; private char escapeCharacter = '\"'; private char fieldDelimiter = '\"'; private char fieldSeparatorRead = ','; private String fieldSeparatorWrite = ","; private boolean caseSensitiveColumnNames; private boolean preserveWhitespace; private boolean writeColumnHeader = true; private char lineComment; private String lineSeparator = SysProperties.LINE_SEPARATOR; private String nullString = ""; private String fileName; private Reader input; private char[] inputBuffer; private int inputBufferPos; private int inputBufferStart = -1; private int inputBufferEnd; private Writer output; private boolean endOfLine, endOfFile; private int writeResultSet(ResultSet rs) throws SQLException { try { int rows = 0; ResultSetMetaData meta = rs.getMetaData(); int columnCount = meta.getColumnCount(); String[] row = new String[columnCount]; int[] sqlTypes = new int[columnCount]; for (int i = 0; i < columnCount; i++) { row[i] = meta.getColumnLabel(i + 1); sqlTypes[i] = meta.getColumnType(i + 1); } if (writeColumnHeader) { writeRow(row); } while (rs.next()) { for (int i = 0; i < columnCount; i++) { Object o; switch (sqlTypes[i]) { case Types.DATE: o = rs.getDate(i + 1); break; case Types.TIME: o = rs.getTime(i + 1); break; case Types.TIMESTAMP: o = rs.getTimestamp(i + 1); break; default: o = rs.getString(i + 1); } row[i] = o == null ? null : o.toString(); } writeRow(row); rows++; } output.close(); return rows; } catch (IOException e) { throw DbException.convertIOException(e, null); } finally { close(); JdbcUtils.closeSilently(rs); } }
Writes the result set to a file in the CSV format.
Params:
  • writer – the writer
  • rs – the result set
Returns:the number of rows written
/** * Writes the result set to a file in the CSV format. * * @param writer the writer * @param rs the result set * @return the number of rows written */
public int write(Writer writer, ResultSet rs) throws SQLException { this.output = writer; return writeResultSet(rs); }
Writes the result set to a file in the CSV format. The result set is read using the following loop:
while (rs.next()) {
    writeRow(row);
}
Params:
  • outputFileName – the name of the csv file
  • rs – the result set - the result set must be positioned before the first row.
  • charset – the charset or null to use the system default charset
Returns:the number of rows written
/** * Writes the result set to a file in the CSV format. The result set is read * using the following loop: * * <pre> * while (rs.next()) { * writeRow(row); * } * </pre> * * @param outputFileName the name of the csv file * @param rs the result set - the result set must be positioned before the * first row. * @param charset the charset or null to use the system default charset * @return the number of rows written */
public int write(String outputFileName, ResultSet rs, String charset) throws SQLException { init(outputFileName, charset); try { initWrite(); return writeResultSet(rs); } catch (IOException e) { throw convertException("IOException writing " + outputFileName, e); } }
Writes the result set of a query to a file in the CSV format.
Params:
  • conn – the connection
  • outputFileName – the file name
  • sql – the query
  • charset – the charset or null to use the system default charset (see system property file.encoding)
Returns:the number of rows written
/** * Writes the result set of a query to a file in the CSV format. * * @param conn the connection * @param outputFileName the file name * @param sql the query * @param charset the charset or null to use the system default charset * (see system property file.encoding) * @return the number of rows written */
public int write(Connection conn, String outputFileName, String sql, String charset) throws SQLException { Statement stat = conn.createStatement(); ResultSet rs = stat.executeQuery(sql); int rows = write(outputFileName, rs, charset); stat.close(); return rows; }
Reads from the CSV file and returns a result set. The rows in the result set are created on demand, that means the file is kept open until all rows are read or the result set is closed.
If the columns are read from the CSV file, then the following rules are used: columns names that start with a letter or '_', and only contain letters, '_', and digits, are considered case insensitive and are converted to uppercase. Other column names are considered case sensitive (that means they need to be quoted when accessed).
Params:
  • inputFileName – the file name
  • colNames – or null if the column names should be read from the CSV file
  • charset – the charset or null to use the system default charset
Returns:the result set
/** * Reads from the CSV file and returns a result set. The rows in the result * set are created on demand, that means the file is kept open until all * rows are read or the result set is closed. * <br /> * If the columns are read from the CSV file, then the following rules are * used: columns names that start with a letter or '_', and only * contain letters, '_', and digits, are considered case insensitive * and are converted to uppercase. Other column names are considered * case sensitive (that means they need to be quoted when accessed). * * @param inputFileName the file name * @param colNames or null if the column names should be read from the CSV * file * @param charset the charset or null to use the system default charset * @return the result set */
public ResultSet read(String inputFileName, String[] colNames, String charset) throws SQLException { init(inputFileName, charset); try { return readResultSet(colNames); } catch (IOException e) { throw convertException("IOException reading " + inputFileName, e); } }
Reads CSV data from a reader and returns a result set. The rows in the result set are created on demand, that means the reader is kept open until all rows are read or the result set is closed.
Params:
  • reader – the reader
  • colNames – or null if the column names should be read from the CSV file
Returns:the result set
/** * Reads CSV data from a reader and returns a result set. The rows in the * result set are created on demand, that means the reader is kept open * until all rows are read or the result set is closed. * * @param reader the reader * @param colNames or null if the column names should be read from the CSV * file * @return the result set */
public ResultSet read(Reader reader, String[] colNames) throws IOException { init(null, null); this.input = reader; return readResultSet(colNames); } private ResultSet readResultSet(String[] colNames) throws IOException { this.columnNames = colNames; initRead(); SimpleResultSet result = new SimpleResultSet(this); makeColumnNamesUnique(); for (String columnName : columnNames) { result.addColumn(columnName, Types.VARCHAR, Integer.MAX_VALUE, 0); } return result; } private void makeColumnNamesUnique() { for (int i = 0; i < columnNames.length; i++) { StringBuilder buff = new StringBuilder(); String n = columnNames[i]; if (n == null || n.isEmpty()) { buff.append('C').append(i + 1); } else { buff.append(n); } for (int j = 0; j < i; j++) { String y = columnNames[j]; if (buff.toString().equals(y)) { buff.append('1'); j = -1; } } columnNames[i] = buff.toString(); } } private void init(String newFileName, String charset) { this.fileName = newFileName; this.characterSet = charset; } private void initWrite() throws IOException { if (output == null) { try { OutputStream out = FileUtils.newOutputStream(fileName, false); out = new BufferedOutputStream(out, Constants.IO_BUFFER_SIZE); output = new BufferedWriter(characterSet != null ? new OutputStreamWriter(out, characterSet) : new OutputStreamWriter(out)); } catch (Exception e) { close(); throw DbException.convertToIOException(e); } } } private void writeRow(String[] values) throws IOException { for (int i = 0; i < values.length; i++) { if (i > 0) { if (fieldSeparatorWrite != null) { output.write(fieldSeparatorWrite); } } String s = values[i]; if (s != null) { if (escapeCharacter != 0) { if (fieldDelimiter != 0) { output.write(fieldDelimiter); } output.write(escape(s)); if (fieldDelimiter != 0) { output.write(fieldDelimiter); } } else { output.write(s); } } else if (nullString != null && nullString.length() > 0) { output.write(nullString); } } output.write(lineSeparator); } private String escape(String data) { if (data.indexOf(fieldDelimiter) < 0) { if (escapeCharacter == fieldDelimiter || data.indexOf(escapeCharacter) < 0) { return data; } } int length = data.length(); StringBuilder buff = new StringBuilder(length); for (int i = 0; i < length; i++) { char ch = data.charAt(i); if (ch == fieldDelimiter || ch == escapeCharacter) { buff.append(escapeCharacter); } buff.append(ch); } return buff.toString(); } private void initRead() throws IOException { if (input == null) { try { InputStream in = FileUtils.newInputStream(fileName); in = new BufferedInputStream(in, Constants.IO_BUFFER_SIZE); input = characterSet != null ? new InputStreamReader(in, characterSet) : new InputStreamReader(in); } catch (IOException e) { close(); throw e; } } if (!input.markSupported()) { input = new BufferedReader(input); } input.mark(1); int bom = input.read(); if (bom != 0xfeff) { // Microsoft Excel compatibility // ignore pseudo-BOM input.reset(); } inputBuffer = new char[Constants.IO_BUFFER_SIZE * 2]; if (columnNames == null) { readHeader(); } } private void readHeader() throws IOException { ArrayList<String> list = new ArrayList<>(); while (true) { String v = readValue(); if (v == null) { if (endOfLine) { if (endOfFile || !list.isEmpty()) { break; } } else { v = "COLUMN" + list.size(); list.add(v); } } else { if (v.isEmpty()) { v = "COLUMN" + list.size(); } else if (!caseSensitiveColumnNames && isSimpleColumnName(v)) { v = StringUtils.toUpperEnglish(v); } list.add(v); if (endOfLine) { break; } } } columnNames = list.toArray(new String[0]); } private static boolean isSimpleColumnName(String columnName) { for (int i = 0, length = columnName.length(); i < length; i++) { char ch = columnName.charAt(i); if (i == 0) { if (ch != '_' && !Character.isLetter(ch)) { return false; } } else { if (ch != '_' && !Character.isLetterOrDigit(ch)) { return false; } } } return columnName.length() != 0; } private void pushBack() { inputBufferPos--; } private int readChar() throws IOException { if (inputBufferPos >= inputBufferEnd) { return readBuffer(); } return inputBuffer[inputBufferPos++]; } private int readBuffer() throws IOException { if (endOfFile) { return -1; } int keep; if (inputBufferStart >= 0) { keep = inputBufferPos - inputBufferStart; if (keep > 0) { char[] src = inputBuffer; if (keep + Constants.IO_BUFFER_SIZE > src.length) { inputBuffer = new char[src.length * 2]; } System.arraycopy(src, inputBufferStart, inputBuffer, 0, keep); } inputBufferStart = 0; } else { keep = 0; } inputBufferPos = keep; int len = input.read(inputBuffer, keep, Constants.IO_BUFFER_SIZE); if (len == -1) { // ensure bufferPos > bufferEnd // even after pushBack inputBufferEnd = -1024; endOfFile = true; // ensure the right number of characters are read // in case the input buffer is still used inputBufferPos++; return -1; } inputBufferEnd = keep + len; return inputBuffer[inputBufferPos++]; } private String readValue() throws IOException { endOfLine = false; inputBufferStart = inputBufferPos; while (true) { int ch = readChar(); if (ch == fieldDelimiter) { // delimited value boolean containsEscape = false; inputBufferStart = inputBufferPos; int sep; while (true) { ch = readChar(); if (ch == fieldDelimiter) { ch = readChar(); if (ch != fieldDelimiter) { sep = 2; break; } containsEscape = true; } else if (ch == escapeCharacter) { ch = readChar(); if (ch < 0) { sep = 1; break; } containsEscape = true; } else if (ch < 0) { sep = 1; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - sep); if (containsEscape) { s = unEscape(s); } inputBufferStart = -1; while (true) { if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } else if (ch == ' ' || ch == '\t') { // ignore } else { pushBack(); break; } ch = readChar(); } return s; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; return null; } else if (ch == fieldSeparatorRead) { // null return null; } else if (ch <= ' ') { // ignore spaces } else if (lineComment != 0 && ch == lineComment) { // comment until end of line inputBufferStart = -1; do { ch = readChar(); } while (ch != '\n' && ch >= 0 && ch != '\r'); endOfLine = true; return null; } else { // un-delimited value while (true) { ch = readChar(); if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - 1); if (!preserveWhitespace) { s = s.trim(); } inputBufferStart = -1; // check un-delimited value for nullString return readNull(s); } } } private String readNull(String s) { return s.equals(nullString) ? null : s; } private String unEscape(String s) { StringBuilder buff = new StringBuilder(s.length()); int start = 0; char[] chars = null; while (true) { int idx = s.indexOf(escapeCharacter, start); if (idx < 0) { idx = s.indexOf(fieldDelimiter, start); if (idx < 0) { break; } } if (chars == null) { chars = s.toCharArray(); } buff.append(chars, start, idx - start); if (idx == s.length() - 1) { start = s.length(); break; } buff.append(chars[idx + 1]); start = idx + 2; } buff.append(s, start, s.length()); return buff.toString(); }
INTERNAL
/** * INTERNAL */
@Override public Object[] readRow() throws SQLException { if (input == null) { return null; } String[] row = new String[columnNames.length]; try { int i = 0; while (true) { String v = readValue(); if (v == null) { if (endOfLine) { if (i == 0) { if (endOfFile) { return null; } // empty line continue; } break; } } if (i < row.length) { row[i++] = v; } if (endOfLine) { break; } } } catch (IOException e) { throw convertException("IOException reading from " + fileName, e); } return row; } private static SQLException convertException(String message, Exception e) { return DbException.getJdbcSQLException(ErrorCode.IO_EXCEPTION_1, e, message); }
INTERNAL
/** * INTERNAL */
@Override public void close() { IOUtils.closeSilently(input); input = null; IOUtils.closeSilently(output); output = null; }
INTERNAL
/** * INTERNAL */
@Override public void reset() throws SQLException { throw new SQLException("Method is not supported", "CSV"); }
Override the field separator for writing. The default is ",".
Params:
  • fieldSeparatorWrite – the field separator
/** * Override the field separator for writing. The default is ",". * * @param fieldSeparatorWrite the field separator */
public void setFieldSeparatorWrite(String fieldSeparatorWrite) { this.fieldSeparatorWrite = fieldSeparatorWrite; }
Get the current field separator for writing.
Returns:the field separator
/** * Get the current field separator for writing. * * @return the field separator */
public String getFieldSeparatorWrite() { return fieldSeparatorWrite; }
Override the case sensitive column names setting. The default is false. If enabled, the case of all column names is always preserved.
Params:
  • caseSensitiveColumnNames – whether column names are case sensitive
/** * Override the case sensitive column names setting. The default is false. * If enabled, the case of all column names is always preserved. * * @param caseSensitiveColumnNames whether column names are case sensitive */
public void setCaseSensitiveColumnNames(boolean caseSensitiveColumnNames) { this.caseSensitiveColumnNames = caseSensitiveColumnNames; }
Get the current case sensitive column names setting.
Returns:whether column names are case sensitive
/** * Get the current case sensitive column names setting. * * @return whether column names are case sensitive */
public boolean getCaseSensitiveColumnNames() { return caseSensitiveColumnNames; }
Override the field separator for reading. The default is ','.
Params:
  • fieldSeparatorRead – the field separator
/** * Override the field separator for reading. The default is ','. * * @param fieldSeparatorRead the field separator */
public void setFieldSeparatorRead(char fieldSeparatorRead) { this.fieldSeparatorRead = fieldSeparatorRead; }
Get the current field separator for reading.
Returns:the field separator
/** * Get the current field separator for reading. * * @return the field separator */
public char getFieldSeparatorRead() { return fieldSeparatorRead; }
Set the line comment character. The default is character code 0 (line comments are disabled).
Params:
  • lineCommentCharacter – the line comment character
/** * Set the line comment character. The default is character code 0 (line * comments are disabled). * * @param lineCommentCharacter the line comment character */
public void setLineCommentCharacter(char lineCommentCharacter) { this.lineComment = lineCommentCharacter; }
Get the line comment character.
Returns:the line comment character, or 0 if disabled
/** * Get the line comment character. * * @return the line comment character, or 0 if disabled */
public char getLineCommentCharacter() { return lineComment; }
Set the field delimiter. The default is " (a double quote). The value 0 means no field delimiter is used.
Params:
  • fieldDelimiter – the field delimiter
/** * Set the field delimiter. The default is " (a double quote). * The value 0 means no field delimiter is used. * * @param fieldDelimiter the field delimiter */
public void setFieldDelimiter(char fieldDelimiter) { this.fieldDelimiter = fieldDelimiter; }
Get the current field delimiter.
Returns:the field delimiter
/** * Get the current field delimiter. * * @return the field delimiter */
public char getFieldDelimiter() { return fieldDelimiter; }
Set the escape character. The escape character is used to escape the field delimiter. This is needed if the data contains the field delimiter. The default escape character is " (a double quote), which is the same as the field delimiter. If the field delimiter and the escape character are both " (double quote), and the data contains a double quote, then an additional double quote is added. Example:
Data: He said "Hello".
Escape character: "
Field delimiter: "
CSV file: "He said ""Hello""."
If the field delimiter is a double quote and the escape character is a backslash, then escaping is done similar to Java (however, only the field delimiter is escaped). Example:
Data: He said "Hello".
Escape character: \
Field delimiter: "
CSV file: "He said \"Hello\"."
The value 0 means no escape character is used.
Params:
  • escapeCharacter – the escape character
/** * Set the escape character. The escape character is used to escape the * field delimiter. This is needed if the data contains the field delimiter. * The default escape character is " (a double quote), which is the same as * the field delimiter. If the field delimiter and the escape character are * both " (double quote), and the data contains a double quote, then an * additional double quote is added. Example: * <pre> * Data: He said "Hello". * Escape character: " * Field delimiter: " * CSV file: "He said ""Hello""." * </pre> * If the field delimiter is a double quote and the escape character is a * backslash, then escaping is done similar to Java (however, only the field * delimiter is escaped). Example: * <pre> * Data: He said "Hello". * Escape character: \ * Field delimiter: " * CSV file: "He said \"Hello\"." * </pre> * The value 0 means no escape character is used. * * @param escapeCharacter the escape character */
public void setEscapeCharacter(char escapeCharacter) { this.escapeCharacter = escapeCharacter; }
Get the current escape character.
Returns:the escape character
/** * Get the current escape character. * * @return the escape character */
public char getEscapeCharacter() { return escapeCharacter; }
Set the line separator used for writing. This is usually a line feed (\n or \r\n depending on the system settings). The line separator is written after each row (including the last row), so this option can include an end-of-row marker if needed.
Params:
  • lineSeparator – the line separator
/** * Set the line separator used for writing. This is usually a line feed (\n * or \r\n depending on the system settings). The line separator is written * after each row (including the last row), so this option can include an * end-of-row marker if needed. * * @param lineSeparator the line separator */
public void setLineSeparator(String lineSeparator) { this.lineSeparator = lineSeparator; }
Get the line separator used for writing.
Returns:the line separator
/** * Get the line separator used for writing. * * @return the line separator */
public String getLineSeparator() { return lineSeparator; }
Set the value that represents NULL. It is only used for non-delimited values.
Params:
  • nullString – the null
/** * Set the value that represents NULL. It is only used for non-delimited * values. * * @param nullString the null */
public void setNullString(String nullString) { this.nullString = nullString; }
Get the current null string.
Returns:the null string.
/** * Get the current null string. * * @return the null string. */
public String getNullString() { return nullString; }
Enable or disable preserving whitespace in unquoted text.
Params:
  • value – the new value for the setting
/** * Enable or disable preserving whitespace in unquoted text. * * @param value the new value for the setting */
public void setPreserveWhitespace(boolean value) { this.preserveWhitespace = value; }
Whether whitespace in unquoted text is preserved.
Returns:the current value for the setting
/** * Whether whitespace in unquoted text is preserved. * * @return the current value for the setting */
public boolean getPreserveWhitespace() { return preserveWhitespace; }
Enable or disable writing the column header.
Params:
  • value – the new value for the setting
/** * Enable or disable writing the column header. * * @param value the new value for the setting */
public void setWriteColumnHeader(boolean value) { this.writeColumnHeader = value; }
Whether the column header is written.
Returns:the current value for the setting
/** * Whether the column header is written. * * @return the current value for the setting */
public boolean getWriteColumnHeader() { return writeColumnHeader; }
INTERNAL. Parse and set the CSV options.
Params:
  • options – the options
Returns:the character set
/** * INTERNAL. * Parse and set the CSV options. * * @param options the options * @return the character set */
public String setOptions(String options) { String charset = null; String[] keyValuePairs = StringUtils.arraySplit(options, ' ', false); for (String pair : keyValuePairs) { if (pair.isEmpty()) { continue; } int index = pair.indexOf('='); String key = StringUtils.trim(pair.substring(0, index), true, true, " "); String value = pair.substring(index + 1); char ch = value.isEmpty() ? 0 : value.charAt(0); if (isParam(key, "escape", "esc", "escapeCharacter")) { setEscapeCharacter(ch); } else if (isParam(key, "fieldDelimiter", "fieldDelim")) { setFieldDelimiter(ch); } else if (isParam(key, "fieldSeparator", "fieldSep")) { setFieldSeparatorRead(ch); setFieldSeparatorWrite(value); } else if (isParam(key, "lineComment", "lineCommentCharacter")) { setLineCommentCharacter(ch); } else if (isParam(key, "lineSeparator", "lineSep")) { setLineSeparator(value); } else if (isParam(key, "null", "nullString")) { setNullString(value); } else if (isParam(key, "charset", "characterSet")) { charset = value; } else if (isParam(key, "preserveWhitespace")) { setPreserveWhitespace(Utils.parseBoolean(value, false, false)); } else if (isParam(key, "writeColumnHeader")) { setWriteColumnHeader(Utils.parseBoolean(value, true, false)); } else if (isParam(key, "caseSensitiveColumnNames")) { setCaseSensitiveColumnNames(Utils.parseBoolean(value, false, false)); } else { throw DbException.getUnsupportedException(key); } } return charset; } private static boolean isParam(String key, String... values) { for (String v : values) { if (key.equalsIgnoreCase(v)) { return true; } } return false; } }