/*
 * For work developed by the HSQL Development Group:
 *
 * Copyright (c) 2001-2019, The HSQL Development Group
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the HSQL Development Group nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *
 * For work originally developed by the Hypersonic SQL Group:
 *
 * Copyright (c) 1995-2000, The Hypersonic SQL Group.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the Hypersonic SQL Group nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * This software consists of voluntary contributions made by many individuals
 * on behalf of the Hypersonic SQL Group.
 */


package org.hsqldb.lib;

import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;

import org.hsqldb.map.BitMap;

Collection of static methods for converting strings between different formats and to and from byte arrays.

Includes two methods based on Hypersonic code as indicated.

Author:Thomas Mueller (Hypersonic SQL Group), Fred Toussi (fredt@users dot sourceforge.net)
Version:2.5.0
Since:1.7.2
/** * Collection of static methods for converting strings between different * formats and to and from byte arrays.<p> * * Includes two methods based on Hypersonic code as indicated. * * @author Thomas Mueller (Hypersonic SQL Group) * @author Fred Toussi (fredt@users dot sourceforge.net) * @version 2.5.0 * @since 1.7.2 */
public class StringConverter { private static final byte[] HEXBYTES = { (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f' }; private static int getNibble(int value) { if (value >= '0' && value <= '9') { return value - '0'; } if (value >= 'a' && value <= 'f') { return 10 + value - 'a'; } if (value >= 'A' && value <= 'F') { return 10 + value - 'A'; } return -1; }
Converts a hexadecimal string into a byte array
Params:
  • s – hexadecimal string
Throws:
Returns:byte array for the hex string
/** * Converts a hexadecimal string into a byte array * * * @param s hexadecimal string * * @return byte array for the hex string * @throws IOException */
public static byte[] hexStringToByteArray(String s) throws IOException { int l = s.length(); byte[] data = new byte[l / 2 + (l % 2)]; int n, b = 0; boolean high = true; int i = 0; for (int j = 0; j < l; j++) { char c = s.charAt(j); if (c == ' ') { continue; } n = getNibble(c); if (n == -1) { throw new IOException( "hexadecimal string contains non hex character"); //NOI18N } if (high) { b = (n & 0xf) << 4; high = false; } else { b += (n & 0xf); high = true; data[i++] = (byte) b; } } if (!high) { throw new IOException( "hexadecimal string with odd number of characters"); //NOI18N } if (i < data.length) { data = (byte[]) ArrayUtil.resizeArray(data, i); } return data; }
Compacts a bit string into a BitMap
Params:
  • s – bit string
Throws:
Returns:BitMap for the bit string
/** * Compacts a bit string into a BitMap * @param s bit string * @return BitMap for the bit string * @throws IOException */
public static BitMap sqlBitStringToBitMap(String s) throws IOException { int l = s.length(); int n; int bitIndex = 0; BitMap map = new BitMap(0, true); for (int j = 0; j < l; j++) { char c = s.charAt(j); if (c == ' ') { continue; } n = getNibble(c); if (n != 0 && n != 1) { throw new IOException( "hexadecimal string contains non hex character"); //NOI18N } if (n == 1) { map.set(bitIndex); } bitIndex++; } map.setSize(bitIndex); return map; }
Converts a byte array into a hexadecimal string
Params:
  • b – byte array
Returns:hex string
/** * Converts a byte array into a hexadecimal string * * * @param b byte array * * @return hex string */
public static String byteArrayToHexString(byte[] b) { int len = b.length; char[] s = new char[len * 2]; for (int i = 0, j = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; s[j++] = (char) HEXBYTES[c >> 4 & 0xf]; s[j++] = (char) HEXBYTES[c & 0xf]; } return new String(s); }
Converts a byte array into an SQL hexadecimal string
Params:
  • b – byte array
Returns:hex string
/** * Converts a byte array into an SQL hexadecimal string * * * @param b byte array * * @return hex string */
public static String byteArrayToSQLHexString(byte[] b) { int len = b.length; char[] s = new char[len * 2 + 3]; s[0] = 'X'; s[1] = '\''; int j = 2; for (int i = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; s[j++] = (char) HEXBYTES[c >> 4 & 0xf]; s[j++] = (char) HEXBYTES[c & 0xf]; } s[j] = '\''; return new String(s); }
Converts a byte array into a bit string
Params:
  • bytes – byte array
  • bitCount – number of bits
Returns:hex string
/** * Converts a byte array into a bit string * * * @param bytes byte array * @param bitCount number of bits * @return hex string */
public static String byteArrayToBitString(byte[] bytes, int bitCount) { char[] s = new char[bitCount]; for (int j = 0; j < bitCount; j++) { byte b = bytes[j / 8]; s[j] = BitMap.isSet(b, j % 8) ? '1' : '0'; } return new String(s); }
Converts a byte array into an SQL binary string
Params:
  • bytes – byte array
  • bitCount – number of bits
Returns:hex string
/** * Converts a byte array into an SQL binary string * * * @param bytes byte array * @param bitCount number of bits * @return hex string */
public static String byteArrayToSQLBitString(byte[] bytes, int bitCount) { char[] s = new char[bitCount + 3]; s[0] = 'B'; s[1] = '\''; int pos = 2; for (int j = 0; j < bitCount; j++) { byte b = bytes[j / 8]; s[pos++] = BitMap.isSet(b, j % 8) ? '1' : '0'; } s[pos] = '\''; return new String(s); }
Converts a byte array into hexadecimal characters which are written as ASCII to the given output stream.
Params:
  • o – output array
  • from – offset into output array
  • b – input array
Returns:written count
/** * Converts a byte array into hexadecimal characters which are written as * ASCII to the given output stream. * * @param o output array * @param from offset into output array * @param b input array * @return written count */
public static int writeHexBytes(byte[] o, final int from, byte[] b) { int len = b.length; int pos = from; for (int i = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; o[pos++] = HEXBYTES[c >> 4 & 0xf]; o[pos++] = HEXBYTES[c & 0xf]; } return pos - from; } public static String byteArrayToString(byte[] b, String charset) { try { return (charset == null) ? new String(b) : new String(b, charset); } catch (Exception e) {} return null; }
Hsqldb specific encoding used only for log files. The SQL statements that need to be written to the log file (input) are Java Unicode strings. input is converted into a 7bit escaped ASCII string (output)with the following transformations. All characters outside the 0x20-7f range are converted to a escape sequence and added to output. If a backslash character is immediately followed by 'u', the backslash character is converted to escape sequence and added to output. All the remaining characters in input are added to output without conversion. The escape sequence is backslash, letter u, xxxx, where xxxx is the hex representation of the character code. (fredt@users)

Method based on Hypersonic Code

Params:
  • b – output stream to wite to
  • s – Java string
  • doubleSingleQuotes – boolean
/** * Hsqldb specific encoding used only for log files. The SQL statements that * need to be written to the log file (input) are Java Unicode strings. * input is converted into a 7bit escaped ASCII string (output)with the * following transformations. All characters outside the 0x20-7f range are * converted to a escape sequence and added to output. If a backslash * character is immediately followed by 'u', the backslash character is * converted to escape sequence and added to output. All the remaining * characters in input are added to output without conversion. The escape * sequence is backslash, letter u, xxxx, where xxxx is the hex * representation of the character code. (fredt@users)<p> * * Method based on Hypersonic Code * * @param b output stream to wite to * @param s Java string * @param doubleSingleQuotes boolean */
public static void stringToUnicodeBytes(HsqlByteArrayOutputStream b, String s, boolean doubleSingleQuotes) { if (s == null) { return; } final int len = s.length(); int extras = 0; if (len == 0) { return; } b.ensureRoom(len * 2 + 5); for (int i = 0; i < len; i++) { char c = s.charAt(i); if (c == '\\') { if ((i < len - 1) && (s.charAt(i + 1) == 'u')) { b.writeNoCheck(c); // encode the \ as unicode, so 'u' is ignored b.writeNoCheck('u'); b.writeNoCheck('0'); b.writeNoCheck('0'); b.writeNoCheck('5'); b.writeNoCheck('c'); extras += 5; } else { b.write(c); } } else if ((c >= 0x0020) && (c <= 0x007f)) { b.writeNoCheck(c); // this is 99% if (c == '\'' && doubleSingleQuotes) { b.writeNoCheck(c); extras++; } } else { b.writeNoCheck('\\'); b.writeNoCheck('u'); b.writeNoCheck(HEXBYTES[(c >> 12) & 0xf]); b.writeNoCheck(HEXBYTES[(c >> 8) & 0xf]); b.writeNoCheck(HEXBYTES[(c >> 4) & 0xf]); b.writeNoCheck(HEXBYTES[c & 0xf]); extras += 5; } if (extras > len) { b.ensureRoom(len + extras + 5); extras = 0; } } } // fredt@users 20020522 - fix for 557510 - backslash bug // this legacy bug resulted from forward reading the input when a backslash // was present and manifested itself when a backslash was followed // immediately by a character outside the 0x20-7f range in a database field.
Hsqldb specific decoding used only for log files. This method converts the 7 bit escaped ASCII strings in a log file back into Java Unicode strings. See stringToUnicodeBytes() above.

Method based on Hypersonic Code

Params:
  • s – encoded ASCII string in byte array
Returns:Java string
/** * Hsqldb specific decoding used only for log files. This method converts * the 7 bit escaped ASCII strings in a log file back into Java Unicode * strings. See stringToUnicodeBytes() above. <p> * * Method based on Hypersonic Code * * @param s encoded ASCII string in byte array * @return Java string */
public static String unicodeStringToString(String s) { if (s == null || !s.contains("\\u")) { return s; } int len = s.length(); char[] b = new char[len]; int j = 0; for (int i = 0; i < len; i++) { char c = s.charAt(i); if (c == '\\' && i < len - 5) { char c1 = s.charAt(i + 1); if (c1 == 'u') { i++; // 4 characters read should always return 0-15 int k = getNibble(s.charAt(++i)) << 12; k += getNibble(s.charAt(++i)) << 8; k += getNibble(s.charAt(++i)) << 4; k += getNibble(s.charAt(++i)); b[j++] = (char) k; } else { b[j++] = c; } } else { b[j++] = c; } } return new String(b, 0, j); } public static String readUTF(byte[] bytearr, int offset, int length) throws IOException { char[] buf = new char[length]; return readUTF(bytearr, offset, length, buf); } public static String readUTF(byte[] bytearr, int offset, int length, char[] buf) throws IOException { int bcount = 0; int c, char2, char3; int count = 0; while (count < length) { c = (int) bytearr[offset + count]; if (bcount == buf.length) { buf = (char[]) ArrayUtil.resizeArray(buf, length); } if (c > 0) { /* 0xxxxxxx*/ count++; buf[bcount++] = (char) c; continue; } c &= 0xff; switch (c >> 4) { case 12 : case 13 : /* 110x xxxx 10xx xxxx*/ count += 2; if (count > length) { throw new UTFDataFormatException(); } char2 = (int) bytearr[offset + count - 1]; if ((char2 & 0xC0) != 0x80) { throw new UTFDataFormatException(); } buf[bcount++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14 : /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > length) { throw new UTFDataFormatException(); } char2 = (int) bytearr[offset + count - 2]; char3 = (int) bytearr[offset + count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new UTFDataFormatException(); } buf[bcount++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F))); break; default : /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException(); } } // The number of chars produced may be less than length return new String(buf, 0, bcount); }
Writes a string to the specified DataOutput using UTF-8 encoding in a machine-independent manner.

Params:
  • str – a string to be written.
  • out – destination to write to
Returns: The number of bytes written out.
/** * Writes a string to the specified DataOutput using UTF-8 encoding in a * machine-independent manner. * <p> * @param str a string to be written. * @param out destination to write to * @return The number of bytes written out. */
public static int stringToUTFBytes(String str, HsqlByteArrayOutputStream out) { int strlen = str.length(); int c, count = 0; if (out.count + strlen + 8 > out.buffer.length) { out.ensureRoom(strlen + 8); } char[] arr = str.toCharArray(); for (int i = 0; i < strlen; i++) { c = arr[i]; if (c >= 0x0001 && c <= 0x007F) { out.buffer[out.count++] = (byte) c; count++; } else if (c > 0x07FF) { out.buffer[out.count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); out.buffer[out.count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); out.buffer[out.count++] = (byte) (0x80 | ((c) & 0x3F)); count += 3; } else { out.buffer[out.count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); out.buffer[out.count++] = (byte) (0x80 | ((c) & 0x3F)); count += 2; } if (out.count + 8 > out.buffer.length) { out.ensureRoom(strlen - i + 8); } } return count; } public static int getUTFSize(String s) { int len = (s == null) ? 0 : s.length(); int l = 0; for (int i = 0; i < len; i++) { int c = s.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { l++; } else if (c > 0x07FF) { l += 3; } else { l += 2; } } return l; }
Using an output stream, returns a String from an InputStream.
Params:
  • is – InputStream to read from
  • encoding – character encoding of the string
Throws:
Returns:a Java string
/** * Using an output stream, returns a String from an InputStream. * * @param is InputStream to read from * @param encoding character encoding of the string * @throws IOException * @return a Java string */
public static String inputStreamToString(InputStream is, String encoding) throws IOException { HsqlByteArrayOutputStream baOS = new HsqlByteArrayOutputStream(1024); while (true) { int c = is.read(); if (c == -1) { break; } baOS.write(c); } return new String(baOS.getBuffer(), 0, baOS.size(), encoding); } // fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size
Returns the quoted version of the string using the quotechar argument. doublequote argument indicates whether each instance of quotechar inside the string is doubled.

null string argument returns null. If the caller needs the literal "NULL" it should created it itself

Params:
  • s – Java string
  • quoteChar – character used for quoting
  • extraQuote – true if quoteChar itself should be repeated
Returns:String
/** * Returns the quoted version of the string using the quotechar argument. * doublequote argument indicates whether each instance of quotechar inside * the string is doubled.<p> * * null string argument returns null. If the caller needs the literal * "NULL" it should created it itself<p> * * @param s Java string * @param quoteChar character used for quoting * @param extraQuote true if quoteChar itself should be repeated * @return String */
public static String toQuotedString(String s, char quoteChar, boolean extraQuote) { if (s == null) { return null; } int count = extraQuote ? count(s, quoteChar) : 0; int len = s.length(); char[] b = new char[2 + count + len]; int i = 0; int j = 0; b[j++] = quoteChar; for (; i < len; i++) { char c = s.charAt(i); b[j++] = c; if (extraQuote && c == quoteChar) { b[j++] = c; } } b[j] = quoteChar; return new String(b); }
Counts Character c in String s
Params:
  • s – Java string
  • c – character to count
Returns:int count
/** * Counts Character c in String s * * @param s Java string * @param c character to count * @return int count */
static int count(final String s, final char c) { int pos = 0; int count = 0; if (s != null) { while ((pos = s.indexOf(c, pos)) > -1) { count++; pos++; } } return count; }
Converts the string to an HTML representation in the ASCII character set and appends it to a byte array output stream.
Params:
  • b – the output byte array output stream
  • s – the input string
/** * Converts the string to an HTML representation in the ASCII character set * and appends it to a byte array output stream. * * @param b the output byte array output stream * @param s the input string */
public static void stringToHtmlBytes(HsqlByteArrayOutputStream b, String s) { if (s == null) { return; } final int len = s.length(); char[] chars; if (len == 0) { return; } chars = s.toCharArray(); b.ensureRoom(len); for (int i = 0; i < len; i++) { char c = chars[i]; if (c > 0x007f || c == '"' || c == '&' || c == '<' || c == '>') { int codePoint = Character.codePointAt(chars, i); if (Character.charCount(codePoint) == 2) { i++; } b.ensureRoom(16); b.writeNoCheck('&'); b.writeNoCheck('#'); b.writeBytes(String.valueOf(codePoint)); b.writeNoCheck(';'); } else if (c < 0x0020) { b.writeNoCheck(' '); } else { b.writeNoCheck(c); } } }
Returns a string representation in UUID form from a binary string. UUID string is composed of 8-4-4-4-12 hexadecimal characters.
Params:
  • b – the byte array
Returns:UUID string form
/** * Returns a string representation in UUID form from a binary string. * * UUID string is composed of 8-4-4-4-12 hexadecimal characters. * * @param b the byte array * @return UUID string form */
public static String toStringUUID(byte[] b) { if (b == null) { return null; } if (b.length != 16) { throw new NumberFormatException(); } char[] chars = new char[36]; int hexIndex; for (int i = 0, j = 0; i < b.length; ) { hexIndex = (b[i] & 0xf0) >> 4; chars[j++] = (char) HEXBYTES[hexIndex]; hexIndex = b[i] & 0xf; chars[j++] = (char) HEXBYTES[hexIndex]; i++; if (i >= 4 && i <= 10 && (i % 2) == 0) { chars[j++] = '-'; } } return new String(chars); } public static int writeUUIDHexBytes(byte[] o, final int from, byte[] b) { if (b.length != 16) { throw new NumberFormatException(); } int pos = from; int hexIndex; for (int i = 0; i < b.length; ) { hexIndex = (b[i] & 0xf0) >> 4; o[pos++] = HEXBYTES[hexIndex]; hexIndex = b[i] & 0xf; o[pos++] = HEXBYTES[hexIndex]; i++; if (i >= 4 && i <= 10 && (i % 2) == 0) { o[pos++] = '-'; } } return pos - from; }
Returns a byte[] representation in UUID form from a UUID string.
Params:
  • s – the UUID string
Returns:byte array
/** * Returns a byte[] representation in UUID form from a UUID string. * * @param s the UUID string * @return byte array */
public static byte[] toBinaryUUID(String s) { if (s == null) { return null; } if (s.length() != 36) { throw new NumberFormatException(); } byte[] bytes = new byte[16]; for (int i = 0, j = 0; i < bytes.length; ) { char c = s.charAt(j++); int high = getNibble(c); c = s.charAt(j++); bytes[i] = (byte) ((high << 4) + getNibble(c)); i++; if (i >= 4 && i <= 10 && (i % 2) == 0) { c = s.charAt(j++); if (c != '-') { throw new NumberFormatException(); } } } return bytes; } }