/*
 * Copyright (c) 2003, PostgreSQL Global Development Group
 * See the LICENSE file in the project root for more information.
 */

package org.postgresql.core;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

Representation of a particular character encoding.
/** * Representation of a particular character encoding. */
public class Encoding { private static final Logger LOGGER = Logger.getLogger(Encoding.class.getName()); private static final Encoding DEFAULT_ENCODING = new Encoding(); private static final Encoding UTF8_ENCODING = new Encoding("UTF-8"); /* * Preferred JVM encodings for backend encodings. */ private static final HashMap<String, String[]> encodings = new HashMap<String, String[]>(); static { //Note: this list should match the set of supported server // encodings found in backend/util/mb/encnames.c encodings.put("SQL_ASCII", new String[]{"ASCII", "US-ASCII"}); encodings.put("UNICODE", new String[]{"UTF-8", "UTF8"}); encodings.put("UTF8", new String[]{"UTF-8", "UTF8"}); encodings.put("LATIN1", new String[]{"ISO8859_1"}); encodings.put("LATIN2", new String[]{"ISO8859_2"}); encodings.put("LATIN3", new String[]{"ISO8859_3"}); encodings.put("LATIN4", new String[]{"ISO8859_4"}); encodings.put("ISO_8859_5", new String[]{"ISO8859_5"}); encodings.put("ISO_8859_6", new String[]{"ISO8859_6"}); encodings.put("ISO_8859_7", new String[]{"ISO8859_7"}); encodings.put("ISO_8859_8", new String[]{"ISO8859_8"}); encodings.put("LATIN5", new String[]{"ISO8859_9"}); encodings.put("LATIN7", new String[]{"ISO8859_13"}); encodings.put("LATIN9", new String[]{"ISO8859_15_FDIS"}); encodings.put("EUC_JP", new String[]{"EUC_JP"}); encodings.put("EUC_CN", new String[]{"EUC_CN"}); encodings.put("EUC_KR", new String[]{"EUC_KR"}); encodings.put("JOHAB", new String[]{"Johab"}); encodings.put("EUC_TW", new String[]{"EUC_TW"}); encodings.put("SJIS", new String[]{"MS932", "SJIS"}); encodings.put("BIG5", new String[]{"Big5", "MS950", "Cp950"}); encodings.put("GBK", new String[]{"GBK", "MS936"}); encodings.put("UHC", new String[]{"MS949", "Cp949", "Cp949C"}); encodings.put("TCVN", new String[]{"Cp1258"}); encodings.put("WIN1256", new String[]{"Cp1256"}); encodings.put("WIN1250", new String[]{"Cp1250"}); encodings.put("WIN874", new String[]{"MS874", "Cp874"}); encodings.put("WIN", new String[]{"Cp1251"}); encodings.put("ALT", new String[]{"Cp866"}); // We prefer KOI8-U, since it is a superset of KOI8-R. encodings.put("KOI8", new String[]{"KOI8_U", "KOI8_R"}); // If the database isn't encoding-aware then we can't have // any preferred encodings. encodings.put("UNKNOWN", new String[0]); // The following encodings do not have a java equivalent encodings.put("MULE_INTERNAL", new String[0]); encodings.put("LATIN6", new String[0]); encodings.put("LATIN8", new String[0]); encodings.put("LATIN10", new String[0]); } private final String encoding; private final boolean fastASCIINumbers;
Uses the default charset of the JVM.
/** * Uses the default charset of the JVM. */
private Encoding() { this(Charset.defaultCharset().name()); }
Subclasses may use this constructor if they know in advance of their ASCII number compatibility.
Params:
  • encoding – charset name to use
  • fastASCIINumbers – whether this encoding is compatible with ASCII numbers.
/** * Subclasses may use this constructor if they know in advance of their ASCII number * compatibility. * * @param encoding charset name to use * @param fastASCIINumbers whether this encoding is compatible with ASCII numbers. */
protected Encoding(String encoding, boolean fastASCIINumbers) { if (encoding == null) { throw new NullPointerException("Null encoding charset not supported"); } this.encoding = encoding; this.fastASCIINumbers = fastASCIINumbers; if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "Creating new Encoding {0} with fastASCIINumbers {1}", new Object[]{encoding, fastASCIINumbers}); } }
Use the charset passed as parameter and tests at creation time whether the specified encoding is compatible with ASCII numbers.
Params:
  • encoding – charset name to use
/** * Use the charset passed as parameter and tests at creation time whether the specified encoding * is compatible with ASCII numbers. * * @param encoding charset name to use */
protected Encoding(String encoding) { this(encoding, testAsciiNumbers(encoding)); }
Returns true if this encoding has characters '-' and '0'..'9' in exactly same posision as ascii.
Returns:true if the bytes can be scanned directly for ascii numbers.
/** * Returns true if this encoding has characters '-' and '0'..'9' in exactly same posision as * ascii. * * @return true if the bytes can be scanned directly for ascii numbers. */
public boolean hasAsciiNumbers() { return fastASCIINumbers; }
Construct an Encoding for a given JVM encoding.
Params:
  • jvmEncoding – the name of the JVM encoding
Returns:an Encoding instance for the specified encoding, or an Encoding instance for the default JVM encoding if the specified encoding is unavailable.
/** * Construct an Encoding for a given JVM encoding. * * @param jvmEncoding the name of the JVM encoding * @return an Encoding instance for the specified encoding, or an Encoding instance for the * default JVM encoding if the specified encoding is unavailable. */
public static Encoding getJVMEncoding(String jvmEncoding) { if ("UTF-8".equals(jvmEncoding)) { return new UTF8Encoding(); } if (Charset.isSupported(jvmEncoding)) { return new Encoding(jvmEncoding); } else { return DEFAULT_ENCODING; } }
Construct an Encoding for a given database encoding.
Params:
  • databaseEncoding – the name of the database encoding
Returns:an Encoding instance for the specified encoding, or an Encoding instance for the default JVM encoding if the specified encoding is unavailable.
/** * Construct an Encoding for a given database encoding. * * @param databaseEncoding the name of the database encoding * @return an Encoding instance for the specified encoding, or an Encoding instance for the * default JVM encoding if the specified encoding is unavailable. */
public static Encoding getDatabaseEncoding(String databaseEncoding) { if ("UTF8".equals(databaseEncoding)) { return UTF8_ENCODING; } // If the backend encoding is known and there is a suitable // encoding in the JVM we use that. Otherwise we fall back // to the default encoding of the JVM. String[] candidates = encodings.get(databaseEncoding); if (candidates != null) { for (String candidate : candidates) { LOGGER.log(Level.FINEST, "Search encoding candidate {0}", candidate); if (Charset.isSupported(candidate)) { return new Encoding(candidate); } } } // Try the encoding name directly -- maybe the charset has been // provided by the user. if (Charset.isSupported(databaseEncoding)) { return new Encoding(databaseEncoding); } // Fall back to default JVM encoding. LOGGER.log(Level.FINEST, "{0} encoding not found, returning default encoding", databaseEncoding); return DEFAULT_ENCODING; }
Get the name of the (JVM) encoding used.
Returns:the JVM encoding name used by this instance.
/** * Get the name of the (JVM) encoding used. * * @return the JVM encoding name used by this instance. */
public String name() { return Charset.isSupported(encoding) ? Charset.forName(encoding).name() : encoding; }
Encode a string to an array of bytes.
Params:
  • s – the string to encode
Throws:
Returns:a bytearray containing the encoded string
/** * Encode a string to an array of bytes. * * @param s the string to encode * @return a bytearray containing the encoded string * @throws IOException if something goes wrong */
public byte[] encode(String s) throws IOException { if (s == null) { return null; } return s.getBytes(encoding); }
Decode an array of bytes into a string.
Params:
  • encodedString – a byte array containing the string to decode
  • offset – the offset in encodedString of the first byte of the encoded representation
  • length – the length, in bytes, of the encoded representation
Throws:
Returns:the decoded string
/** * Decode an array of bytes into a string. * * @param encodedString a byte array containing the string to decode * @param offset the offset in <code>encodedString</code> of the first byte of the encoded * representation * @param length the length, in bytes, of the encoded representation * @return the decoded string * @throws IOException if something goes wrong */
public String decode(byte[] encodedString, int offset, int length) throws IOException { return new String(encodedString, offset, length, encoding); }
Decode an array of bytes into a string.
Params:
  • encodedString – a byte array containing the string to decode
Throws:
Returns:the decoded string
/** * Decode an array of bytes into a string. * * @param encodedString a byte array containing the string to decode * @return the decoded string * @throws IOException if something goes wrong */
public String decode(byte[] encodedString) throws IOException { return decode(encodedString, 0, encodedString.length); }
Get a Reader that decodes the given InputStream using this encoding.
Params:
  • in – the underlying stream to decode from
Throws:
Returns:a non-null Reader implementation.
/** * Get a Reader that decodes the given InputStream using this encoding. * * @param in the underlying stream to decode from * @return a non-null Reader implementation. * @throws IOException if something goes wrong */
public Reader getDecodingReader(InputStream in) throws IOException { return new InputStreamReader(in, encoding); }
Get a Writer that encodes to the given OutputStream using this encoding.
Params:
  • out – the underlying stream to encode to
Throws:
Returns:a non-null Writer implementation.
/** * Get a Writer that encodes to the given OutputStream using this encoding. * * @param out the underlying stream to encode to * @return a non-null Writer implementation. * @throws IOException if something goes wrong */
public Writer getEncodingWriter(OutputStream out) throws IOException { return new OutputStreamWriter(out, encoding); }
Get an Encoding using the default encoding for the JVM.
Returns:an Encoding instance
/** * Get an Encoding using the default encoding for the JVM. * * @return an Encoding instance */
public static Encoding defaultEncoding() { return DEFAULT_ENCODING; } public String toString() { return encoding; }
Checks whether this encoding is compatible with ASCII for the number characters '-' and '0'..'9'. Where compatible means that they are encoded with exactly same values.
Returns:If faster ASCII number parsing can be used with this encoding.
/** * Checks whether this encoding is compatible with ASCII for the number characters '-' and * '0'..'9'. Where compatible means that they are encoded with exactly same values. * * @return If faster ASCII number parsing can be used with this encoding. */
private static boolean testAsciiNumbers(String encoding) { // TODO: test all postgres supported encoding to see if there are // any which do _not_ have ascii numbers in same location // at least all the encoding listed in the encodings hashmap have // working ascii numbers try { String test = "-0123456789"; byte[] bytes = test.getBytes(encoding); String res = new String(bytes, "US-ASCII"); return test.equals(res); } catch (java.io.UnsupportedEncodingException e) { return false; } } }