/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.Charsets;
Converts String to and from bytes using the encodings required by the Java specification. These encodings are
specified in
Standard charsets.
This class is immutable and thread-safe.
See Also: - CharEncoding
- Standard charsets
Version: $Id$ Since: 1.4
/**
* Converts String to and from bytes using the encodings required by the Java specification. These encodings are
* specified in <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">
* Standard charsets</a>.
*
* <p>This class is immutable and thread-safe.</p>
*
* @see CharEncoding
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @version $Id$
* @since 1.4
*/
public class StringUtils {
Compares two CharSequences, returning true
if they represent equal sequences of characters.
null
s are handled without exceptions. Two null
references are considered to be equal.
The comparison is case sensitive.
StringUtils.equals(null, null) = true
StringUtils.equals(null, "abc") = false
StringUtils.equals("abc", null) = false
StringUtils.equals("abc", "abc") = true
StringUtils.equals("abc", "ABC") = false
Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
Params: - cs1 –
the first CharSequence, may be
null
- cs2 –
the second CharSequence, may be
null
See Also: - equals.equals(Object)
Returns: true
if the CharSequences are equal (case-sensitive), or both null
Since: 1.10
/**
* <p>
* Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
* </p>
*
* <p>
* <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
* The comparison is case sensitive.
* </p>
*
* <pre>
* StringUtils.equals(null, null) = true
* StringUtils.equals(null, "abc") = false
* StringUtils.equals("abc", null) = false
* StringUtils.equals("abc", "abc") = true
* StringUtils.equals("abc", "ABC") = false
* </pre>
*
* <p>
* Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
* </p>
*
* @see Object#equals(Object)
* @param cs1
* the first CharSequence, may be <code>null</code>
* @param cs2
* the second CharSequence, may be <code>null</code>
* @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
* @since 1.10
*/
public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
if (cs1 == cs2) {
return true;
}
if (cs1 == null || cs2 == null) {
return false;
}
if (cs1 instanceof String && cs2 instanceof String) {
return cs1.equals(cs2);
}
return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
}
Calls String.getBytes(Charset)
Params: - string –
The string to encode (if null, return null).
- charset – The
Charset
to encode the String
Returns: the encoded bytes
/**
* Calls {@link String#getBytes(Charset)}
*
* @param string
* The string to encode (if null, return null).
* @param charset
* The {@link Charset} to encode the <code>String</code>
* @return the encoded bytes
*/
private static byte[] getBytes(final String string, final Charset charset) {
if (string == null) {
return null;
}
return string.getBytes(charset);
}
Calls String.getBytes(Charset)
Params: - string –
The string to encode (if null, return null).
- charset – The
Charset
to encode the String
Returns: the encoded bytes
/**
* Calls {@link String#getBytes(Charset)}
*
* @param string
* The string to encode (if null, return null).
* @param charset
* The {@link Charset} to encode the <code>String</code>
* @return the encoded bytes
*/
private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
if (string == null) {
return null;
}
return ByteBuffer.wrap(string.getBytes(charset));
}
Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.UTF_8
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: 1.11
/**
* Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
* @since 1.11
*/
public static ByteBuffer getByteBufferUtf8(final String string) {
return getByteBuffer(string, Charsets.UTF_8);
}
Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
byte array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.ISO_8859_1
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
* byte array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesIso8859_1(final String string) {
return getBytes(string, Charsets.ISO_8859_1);
}
Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
array.
This method catches UnsupportedEncodingException
and rethrows it as IllegalStateException
, which should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
Params: - string –
the String to encode, may be
null
- charsetName – The name of a required
Charset
Throws: - IllegalStateException – Thrown when a
UnsupportedEncodingException
is caught, which should never happen for a required charset name.
See Also: Returns: encoded bytes, or null
if the input string was null
/**
* Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
* array.
* <p>
* This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
* </p>
*
* @param string
* the String to encode, may be <code>null</code>
* @param charsetName
* The name of a required {@link java.nio.charset.Charset}
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws IllegalStateException
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
* required charset name.
* @see CharEncoding
* @see String#getBytes(String)
*/
public static byte[] getBytesUnchecked(final String string, final String charsetName) {
if (string == null) {
return null;
}
try {
return string.getBytes(charsetName);
} catch (final UnsupportedEncodingException e) {
throw StringUtils.newIllegalStateException(charsetName, e);
}
}
Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.US_ASCII
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUsAscii(final String string) {
return getBytes(string, Charsets.US_ASCII);
}
Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.UTF_16
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16(final String string) {
return getBytes(string, Charsets.UTF_16);
}
Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.UTF_16BE
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16Be(final String string) {
return getBytes(string, Charsets.UTF_16BE);
}
Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.UTF_16LE
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16Le(final String string) {
return getBytes(string, Charsets.UTF_16LE);
}
Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
array.
Params: - string –
the String to encode, may be
null
Throws: - NullPointerException – Thrown if
Charsets.UTF_8
is not initialized, which should never happen since it is required by the Java platform specification.
See Also: Returns: encoded bytes, or null
if the input string was null
Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf8(final String string) {
return getBytes(string, Charsets.UTF_8);
}
private static IllegalStateException newIllegalStateException(final String charsetName,
final UnsupportedEncodingException e) {
return new IllegalStateException(charsetName + ": " + e);
}
Constructs a new String
by decoding the specified array of bytes using the given charset.
Params: - bytes –
The bytes to be decoded into characters
- charset – The
Charset
to encode the String
; not null
Throws: - NullPointerException – Thrown if charset is
null
Returns: A new String
decoded from the specified array of bytes using the given charset,
or null
if the input byte array was null
.
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
*
* @param bytes
* The bytes to be decoded into characters
* @param charset
* The {@link Charset} to encode the <code>String</code>; not {@code null}
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if charset is {@code null}
*/
private static String newString(final byte[] bytes, final Charset charset) {
return bytes == null ? null : new String(bytes, charset);
}
Constructs a new String
by decoding the specified array of bytes using the given charset.
This method catches UnsupportedEncodingException
and re-throws it as IllegalStateException
, which should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
Params: - bytes –
The bytes to be decoded into characters, may be
null
- charsetName – The name of a required
Charset
Throws: - IllegalStateException – Thrown when a
UnsupportedEncodingException
is caught, which should never happen for a required charset name.
See Also: Returns: A new String
decoded from the specified array of bytes using the given charset,
or null
if the input byte array was null
.
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
* <p>
* This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
* </p>
*
* @param bytes
* The bytes to be decoded into characters, may be <code>null</code>
* @param charsetName
* The name of a required {@link java.nio.charset.Charset}
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws IllegalStateException
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
* required charset name.
* @see CharEncoding
* @see String#String(byte[], String)
*/
public static String newString(final byte[] bytes, final String charsetName) {
if (bytes == null) {
return null;
}
try {
return new String(bytes, charsetName);
} catch (final UnsupportedEncodingException e) {
throw StringUtils.newIllegalStateException(charsetName, e);
}
}
Constructs a new String
by decoding the specified array of bytes using the ISO-8859-1 charset.
Params: - bytes –
The bytes to be decoded into characters, may be
null
Throws: - NullPointerException – Thrown if
Charsets.ISO_8859_1
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the ISO-8859-1 charset, or
null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
*
* @param bytes
* The bytes to be decoded into characters, may be <code>null</code>
* @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
* <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringIso8859_1(final byte[] bytes) {
return newString(bytes, Charsets.ISO_8859_1);
}
Constructs a new String
by decoding the specified array of bytes using the US-ASCII charset.
Params: - bytes –
The bytes to be decoded into characters
Throws: - NullPointerException – Thrown if
Charsets.US_ASCII
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the US-ASCII charset,
or null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUsAscii(final byte[] bytes) {
return newString(bytes, Charsets.US_ASCII);
}
Constructs a new String
by decoding the specified array of bytes using the UTF-16 charset.
Params: - bytes –
The bytes to be decoded into characters
Throws: - NullPointerException – Thrown if
Charsets.UTF_16
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the UTF-16 charset
or null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16(final byte[] bytes) {
return newString(bytes, Charsets.UTF_16);
}
Constructs a new String
by decoding the specified array of bytes using the UTF-16BE charset.
Params: - bytes –
The bytes to be decoded into characters
Throws: - NullPointerException – Thrown if
Charsets.UTF_16BE
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the UTF-16BE charset,
or null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16Be(final byte[] bytes) {
return newString(bytes, Charsets.UTF_16BE);
}
Constructs a new String
by decoding the specified array of bytes using the UTF-16LE charset.
Params: - bytes –
The bytes to be decoded into characters
Throws: - NullPointerException – Thrown if
Charsets.UTF_16LE
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the UTF-16LE charset,
or null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16Le(final byte[] bytes) {
return newString(bytes, Charsets.UTF_16LE);
}
Constructs a new String
by decoding the specified array of bytes using the UTF-8 charset.
Params: - bytes –
The bytes to be decoded into characters
Throws: - NullPointerException – Thrown if
Charsets.UTF_8
is not initialized, which should never happen since it is required by the Java platform specification.
Returns: A new String
decoded from the specified array of bytes using the UTF-8 charset,
or null
if the input byte array was null
. Since: As of 1.7, throws NullPointerException
instead of UnsupportedEncodingException
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf8(final byte[] bytes) {
return newString(bytes, Charsets.UTF_8);
}
}