/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Locale;
import org.apache.commons.lang.exception.NestableRuntimeException;
import org.apache.commons.lang.text.StrBuilder;
Escapes and unescapes String
s for
Java, Java Script, HTML, XML, and SQL.
#ThreadSafe#
Author: Apache Software Foundation, Apache Jakarta Turbine, Purple Technology, Alexander Day Chaffee, Antony Riley, Helge Tesgaard, Sean Brown, Gary Gregory, Phil Steitz, Pete Gieser Since: 2.0 Version: $Id: StringEscapeUtils.java 1057072 2011-01-10 01:55:57Z niallp $
/**
* <p>Escapes and unescapes <code>String</code>s for
* Java, Java Script, HTML, XML, and SQL.</p>
*
* <p>#ThreadSafe#</p>
* @author Apache Software Foundation
* @author Apache Jakarta Turbine
* @author Purple Technology
* @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
* @author Antony Riley
* @author Helge Tesgaard
* @author <a href="sean@boohai.com">Sean Brown</a>
* @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
* @author Phil Steitz
* @author Pete Gieser
* @since 2.0
* @version $Id: StringEscapeUtils.java 1057072 2011-01-10 01:55:57Z niallp $
*/
public class StringEscapeUtils {
private static final char CSV_DELIMITER = ',';
private static final char CSV_QUOTE = '"';
private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
StringEscapeUtils
instances should NOT be constructed in
standard programming.
Instead, the class should be used as:
StringEscapeUtils.escapeJava("foo");
This constructor is public to permit tools that require a JavaBean
instance to operate.
/**
* <p><code>StringEscapeUtils</code> instances should NOT be constructed in
* standard programming.</p>
*
* <p>Instead, the class should be used as:
* <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public StringEscapeUtils() {
super();
}
// Java and JavaScript
//--------------------------------------------------------------------------
Escapes the characters in a String
using Java String rules.
Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
So a tab becomes the characters '\\'
and
't'
.
The only difference between Java strings and JavaScript strings
is that in JavaScript, a single quote must be escaped.
Example:
input string: He didn't say, "Stop!"
output string: He didn't say, \"Stop!\"
Params: - str – String to escape values in, may be null
Returns: String with escaped values, null
if null string input
/**
* <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
*
* <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
*
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
*
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn't say, \"Stop!\"
* </pre>
* </p>
*
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
*/
public static String escapeJava(String str) {
return escapeJavaStyleString(str, false, false);
}
Escapes the characters in a String
using Java String rules to
a Writer
.
A null
string input has no effect.
Params: - out – Writer to write escaped string into
- str – String to escape values in, may be null
Throws: - IllegalArgumentException – if the Writer is
null
- IOException – if error occurs on underlying Writer
See Also: - escapeJava(String)
/**
* <p>Escapes the characters in a <code>String</code> using Java String rules to
* a <code>Writer</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #escapeJava(java.lang.String)
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
*/
public static void escapeJava(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, false, false);
}
Escapes the characters in a String
using JavaScript String rules.
Escapes any values it finds into their JavaScript String form.
Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
So a tab becomes the characters '\\'
and
't'
.
The only difference between Java strings and JavaScript strings
is that in JavaScript, a single quote must be escaped.
Example:
input string: He didn't say, "Stop!"
output string: He didn\'t say, \"Stop!\"
Params: - str – String to escape values in, may be null
Returns: String with escaped values, null
if null string input
/**
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
* <p>Escapes any values it finds into their JavaScript String form.
* Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
*
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
*
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn\'t say, \"Stop!\"
* </pre>
* </p>
*
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
*/
public static String escapeJavaScript(String str) {
return escapeJavaStyleString(str, true, true);
}
Escapes the characters in a String
using JavaScript String rules
to a Writer
.
A null
string input has no effect.
Params: - out – Writer to write escaped string into
- str – String to escape values in, may be null
Throws: - IllegalArgumentException – if the Writer is
null
- IOException – if error occurs on underlying Writer
See Also: - escapeJavaScript(String)
/**
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules
* to a <code>Writer</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #escapeJavaScript(java.lang.String)
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
**/
public static void escapeJavaScript(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, true, true);
}
Worker method for the escapeJavaScript(String)
method.
Params: - str – String to escape values in, may be null
- escapeSingleQuotes – escapes single quotes if
true
- escapeForwardSlash – TODO
Returns: the escaped string
/**
* <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
*
* @param str String to escape values in, may be null
* @param escapeSingleQuotes escapes single quotes if <code>true</code>
* @param escapeForwardSlash TODO
* @return the escaped string
*/
private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length() * 2);
escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
Worker method for the escapeJavaScript(String)
method.
Params: - out – write to receieve the escaped string
- str – String to escape values in, may be null
- escapeSingleQuote – escapes single quotes if
true
- escapeForwardSlash – TODO
Throws: - IOException – if an IOException occurs
/**
* <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
*
* @param out write to receieve the escaped string
* @param str String to escape values in, may be null
* @param escapeSingleQuote escapes single quotes if <code>true</code>
* @param escapeForwardSlash TODO
* @throws IOException if an IOException occurs
*/
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
boolean escapeForwardSlash) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz;
sz = str.length();
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
// handle unicode
if (ch > 0xfff) {
out.write("\\u" + hex(ch));
} else if (ch > 0xff) {
out.write("\\u0" + hex(ch));
} else if (ch > 0x7f) {
out.write("\\u00" + hex(ch));
} else if (ch < 32) {
switch (ch) {
case '\b' :
out.write('\\');
out.write('b');
break;
case '\n' :
out.write('\\');
out.write('n');
break;
case '\t' :
out.write('\\');
out.write('t');
break;
case '\f' :
out.write('\\');
out.write('f');
break;
case '\r' :
out.write('\\');
out.write('r');
break;
default :
if (ch > 0xf) {
out.write("\\u00" + hex(ch));
} else {
out.write("\\u000" + hex(ch));
}
break;
}
} else {
switch (ch) {
case '\'' :
if (escapeSingleQuote) {
out.write('\\');
}
out.write('\'');
break;
case '"' :
out.write('\\');
out.write('"');
break;
case '\\' :
out.write('\\');
out.write('\\');
break;
case '/' :
if (escapeForwardSlash) {
out.write('\\');
}
out.write('/');
break;
default :
out.write(ch);
break;
}
}
}
}
Returns an upper case hexadecimal String
for the given
character.
Params: - ch – The character to convert.
Returns: An upper case hexadecimal String
/**
* <p>Returns an upper case hexadecimal <code>String</code> for the given
* character.</p>
*
* @param ch The character to convert.
* @return An upper case hexadecimal <code>String</code>
*/
private static String hex(char ch) {
return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
}
Unescapes any Java literals found in the String
.
For example, it will turn a sequence of '\'
and
'n'
into a newline character, unless the '\'
is preceded by another '\'
.
Params: - str – the
String
to unescape, may be null
Returns: a new unescaped String
, null
if null string input
/**
* <p>Unescapes any Java literals found in the <code>String</code>.
* For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
*/
public static String unescapeJava(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length());
unescapeJava(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
Unescapes any Java literals found in the String
to a
Writer
.
For example, it will turn a sequence of '\'
and
'n'
into a newline character, unless the '\'
is preceded by another '\'
.
A null
string input has no effect.
Params: - out – the
Writer
used to output unescaped characters - str – the
String
to unescape, may be null
Throws: - IllegalArgumentException – if the Writer is
null
- IOException – if error occurs on underlying Writer
/**
* <p>Unescapes any Java literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz = str.length();
StrBuilder unicode = new StrBuilder(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
}
}
continue;
}
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
out.write('\\');
break;
case '\'':
out.write('\'');
break;
case '\"':
out.write('"');
break;
case 'r':
out.write('\r');
break;
case 'f':
out.write('\f');
break;
case 't':
out.write('\t');
break;
case 'n':
out.write('\n');
break;
case 'b':
out.write('\b');
break;
case 'u':
{
// uh-oh, we're in unicode country....
inUnicode = true;
break;
}
default :
out.write(ch);
break;
}
continue;
} else if (ch == '\\') {
hadSlash = true;
continue;
}
out.write(ch);
}
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
out.write('\\');
}
}
Unescapes any JavaScript literals found in the String
.
For example, it will turn a sequence of '\'
and 'n'
into a newline character, unless the '\'
is preceded by another
'\'
.
Params: - str – the
String
to unescape, may be null
See Also: - unescapeJava(String)
Returns: A new unescaped String
, null
if null string input
/**
* <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
*
* @see #unescapeJava(String)
* @param str the <code>String</code> to unescape, may be null
* @return A new unescaped <code>String</code>, <code>null</code> if null string input
*/
public static String unescapeJavaScript(String str) {
return unescapeJava(str);
}
Unescapes any JavaScript literals found in the String
to a
Writer
.
For example, it will turn a sequence of '\'
and 'n'
into a newline character, unless the '\'
is preceded by another
'\'
.
A null
string input has no effect.
Params: - out – the
Writer
used to output unescaped characters - str – the
String
to unescape, may be null
Throws: - IllegalArgumentException – if the Writer is
null
- IOException – if error occurs on underlying Writer
See Also: - unescapeJava(Writer, String)
/**
* <p>Unescapes any JavaScript literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #unescapeJava(Writer,String)
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJavaScript(Writer out, String str) throws IOException {
unescapeJava(out, str);
}
// HTML and XML
//--------------------------------------------------------------------------
Escapes the characters in a String
using HTML entities.
For example:
"bread" & "butter"
becomes:
"bread" & "butter"
.
Supports all known HTML 4.0 entities, including funky accents.
Note that the commonly used apostrophe escape character (')
is not a legal entity and so is not supported).
Params: - str – the
String
to escape, may be null
See Also: Returns: a new escaped String
, null
if null string input
/**
* <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
*
* <p>
* For example:
* </p>
* <p><code>"bread" & "butter"</code></p>
* becomes:
* <p>
* <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
* </p>
*
* <p>Supports all known HTML 4.0 entities, including funky accents.
* Note that the commonly used apostrophe escape character (&apos;)
* is not a legal entity and so is not supported). </p>
*
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
*
* @see #unescapeHtml(String)
* @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
* @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
*/
public static String escapeHtml(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
escapeHtml(writer, str);
return writer.toString();
} catch (IOException ioe) {
//should be impossible
throw new UnhandledException(ioe);
}
}
Escapes the characters in a String
using HTML entities and writes
them to a Writer
.
For example:
"bread" & "butter"
becomes:
"bread" & "butter"
.
Supports all known HTML 4.0 entities, including funky accents.
Note that the commonly used apostrophe escape character (')
is not a legal entity and so is not supported).
Params: - writer – the writer receiving the escaped string, not null
- string – the
String
to escape, may be null
Throws: - IllegalArgumentException – if the writer is null
- IOException – when
Writer
passed throws the exception from calls to the Writer.write(int)
methods.
See Also:
/**
* <p>Escapes the characters in a <code>String</code> using HTML entities and writes
* them to a <code>Writer</code>.</p>
*
* <p>
* For example:
* </p>
* <code>"bread" & "butter"</code>
* <p>becomes:</p>
* <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
*
* <p>Supports all known HTML 4.0 entities, including funky accents.
* Note that the commonly used apostrophe escape character (&apos;)
* is not a legal entity and so is not supported). </p>
*
* @param writer the writer receiving the escaped string, not null
* @param string the <code>String</code> to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException when <code>Writer</code> passed throws the exception from
* calls to the {@link Writer#write(int)} methods.
*
* @see #escapeHtml(String)
* @see #unescapeHtml(String)
* @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
* @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
*/
public static void escapeHtml(Writer writer, String string) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (string == null) {
return;
}
Entities.HTML40.escape(writer, string);
}
//-----------------------------------------------------------------------
Unescapes a string containing entity escapes to a string
containing the actual Unicode characters corresponding to the
escapes. Supports HTML 4.0 entities.
For example, the string "<Français>"
will become "<Français>"
If an entity is unrecognized, it is left alone, and inserted
verbatim into the result string. e.g. ">&zzzz;x" will
become ">&zzzz;x".
Params: - str – the
String
to unescape, may be null
See Also: Returns: a new unescaped String
, null
if null string input
/**
* <p>Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.</p>
*
* <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
* will become "<Français>"</p>
*
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&gt;&zzzz;x" will
* become ">&zzzz;x".</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeHtml(Writer, String)
*/
public static String unescapeHtml(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
unescapeHtml(writer, str);
return writer.toString();
} catch (IOException ioe) {
//should be impossible
throw new UnhandledException(ioe);
}
}
Unescapes a string containing entity escapes to a string
containing the actual Unicode characters corresponding to the
escapes. Supports HTML 4.0 entities.
For example, the string "<Français>"
will become "<Français>"
If an entity is unrecognized, it is left alone, and inserted
verbatim into the result string. e.g. ">&zzzz;x" will
become ">&zzzz;x".
Params: - writer – the writer receiving the unescaped string, not null
- string – the
String
to unescape, may be null
Throws: - IllegalArgumentException – if the writer is null
- IOException – if an IOException occurs
See Also:
/**
* <p>Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.</p>
*
* <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
* will become "<Français>"</p>
*
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&gt;&zzzz;x" will
* become ">&zzzz;x".</p>
*
* @param writer the writer receiving the unescaped string, not null
* @param string the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if an IOException occurs
* @see #escapeHtml(String)
*/
public static void unescapeHtml(Writer writer, String string) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (string == null) {
return;
}
Entities.HTML40.unescape(writer, string);
}
//-----------------------------------------------------------------------
Escapes the characters in a String
using XML entities.
For example: "bread" & "butter" =>
"bread" & "butter".
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
Does not support DTDs or external entities.
Note that unicode characters greater than 0x7f are currently escaped to
their numerical \\u equivalent. This may change in future releases.
Params: - writer – the writer receiving the unescaped string, not null
- str – the
String
to escape, may be null
Throws: - IllegalArgumentException – if the writer is null
- IOException – if there is a problem writing
See Also:
/**
* <p>Escapes the characters in a <code>String</code> using XML entities.</p>
*
* <p>For example: <tt>"bread" & "butter"</tt> =>
* <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
* </p>
*
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
*
* <p>Note that unicode characters greater than 0x7f are currently escaped to
* their numerical \\u equivalent. This may change in future releases. </p>
*
* @param writer the writer receiving the unescaped string, not null
* @param str the <code>String</code> to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #unescapeXml(java.lang.String)
*/
public static void escapeXml(Writer writer, String str) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (str == null) {
return;
}
Entities.XML.escape(writer, str);
}
Escapes the characters in a String
using XML entities.
For example: "bread" & "butter" =>
"bread" & "butter".
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
Does not support DTDs or external entities.
Note that unicode characters greater than 0x7f are currently escaped to
their numerical \\u equivalent. This may change in future releases.
Params: - str – the
String
to escape, may be null
See Also: Returns: a new escaped String
, null
if null string input
/**
* <p>Escapes the characters in a <code>String</code> using XML entities.</p>
*
* <p>For example: <tt>"bread" & "butter"</tt> =>
* <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
* </p>
*
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
*
* <p>Note that unicode characters greater than 0x7f are currently escaped to
* their numerical \\u equivalent. This may change in future releases. </p>
*
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
* @see #unescapeXml(java.lang.String)
*/
public static String escapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.escape(str);
}
//-----------------------------------------------------------------------
Unescapes a string containing XML entity escapes to a string
containing the actual Unicode characters corresponding to the
escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
Does not support DTDs or external entities.
Note that numerical \\u unicode codes are unescaped to their respective
unicode characters. This may change in future releases.
Params: - writer – the writer receiving the unescaped string, not null
- str – the
String
to unescape, may be null
Throws: - IllegalArgumentException – if the writer is null
- IOException – if there is a problem writing
See Also:
/**
* <p>Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.</p>
*
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
*
* <p>Note that numerical \\u unicode codes are unescaped to their respective
* unicode characters. This may change in future releases. </p>
*
* @param writer the writer receiving the unescaped string, not null
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #escapeXml(String)
*/
public static void unescapeXml(Writer writer, String str) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (str == null) {
return;
}
Entities.XML.unescape(writer, str);
}
Unescapes a string containing XML entity escapes to a string
containing the actual Unicode characters corresponding to the
escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
Does not support DTDs or external entities.
Note that numerical \\u unicode codes are unescaped to their respective
unicode characters. This may change in future releases.
Params: - str – the
String
to unescape, may be null
See Also: Returns: a new unescaped String
, null
if null string input
/**
* <p>Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.</p>
*
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
*
* <p>Note that numerical \\u unicode codes are unescaped to their respective
* unicode characters. This may change in future releases. </p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeXml(String)
*/
public static String unescapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.unescape(str);
}
//-----------------------------------------------------------------------
Escapes the characters in a String
to be suitable to pass to
an SQL query.
For example,
statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
StringEscapeUtils.escapeSql("McHale's Navy") +
"'");
At present, this method only turns single-quotes into doubled single-quotes
("McHale's Navy"
=> "McHale''s Navy"
). It does not
handle the cases of percent (%) or underscore (_) for use in LIKE clauses.
see http://www.jguru.com/faq/view.jsp?EID=8881
Params: - str – the string to escape, may be null
Returns: a new String, escaped for SQL, null
if null string input
/**
* <p>Escapes the characters in a <code>String</code> to be suitable to pass to
* an SQL query.</p>
*
* <p>For example,
* <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
* StringEscapeUtils.escapeSql("McHale's Navy") +
* "'");</pre>
* </p>
*
* <p>At present, this method only turns single-quotes into doubled single-quotes
* (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
* handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
*
* see http://www.jguru.com/faq/view.jsp?EID=8881
* @param str the string to escape, may be null
* @return a new String, escaped for SQL, <code>null</code> if null string input
*/
public static String escapeSql(String str) {
if (str == null) {
return null;
}
return StringUtils.replace(str, "'", "''");
}
//-----------------------------------------------------------------------
Returns a String
value for a CSV column enclosed in double quotes,
if required.
If the value contains a comma, newline or double quote, then the
String value is returned enclosed in double quotes.
Any double quote characters in the value are escaped with another double quote.
If the value does not contain a comma, newline or double quote, then the
String value is returned unchanged.
see Wikipedia and
RFC 4180.
Params: - str – the input CSV column String, may be null
Returns: the input String, enclosed in double quotes if the value contains a comma,
newline or double quote, null
if null string input Since: 2.4
/**
* <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes,
* if required.</p>
*
* <p>If the value contains a comma, newline or double quote, then the
* String value is returned enclosed in double quotes.</p>
* </p>
*
* <p>Any double quote characters in the value are escaped with another double quote.</p>
*
* <p>If the value does not contain a comma, newline or double quote, then the
* String value is returned unchanged.</p>
* </p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param str the input CSV column String, may be null
* @return the input String, enclosed in double quotes if the value contains a comma,
* newline or double quote, <code>null</code> if null string input
* @since 2.4
*/
public static String escapeCsv(String str) {
if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
return str;
}
try {
StringWriter writer = new StringWriter();
escapeCsv(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
Writes a String
value for a CSV column enclosed in double quotes,
if required.
If the value contains a comma, newline or double quote, then the
String value is written enclosed in double quotes.
Any double quote characters in the value are escaped with another double quote.
If the value does not contain a comma, newline or double quote, then the
String value is written unchanged (null values are ignored).
see Wikipedia and
RFC 4180.
Params: - str – the input CSV column String, may be null
- out – Writer to write input string to, enclosed in double quotes if it contains
a comma, newline or double quote
Throws: - IOException – if error occurs on underlying Writer
Since: 2.4
/**
* <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes,
* if required.</p>
*
* <p>If the value contains a comma, newline or double quote, then the
* String value is written enclosed in double quotes.</p>
* </p>
*
* <p>Any double quote characters in the value are escaped with another double quote.</p>
*
* <p>If the value does not contain a comma, newline or double quote, then the
* String value is written unchanged (null values are ignored).</p>
* </p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param str the input CSV column String, may be null
* @param out Writer to write input string to, enclosed in double quotes if it contains
* a comma, newline or double quote
* @throws IOException if error occurs on underlying Writer
* @since 2.4
*/
public static void escapeCsv(Writer out, String str) throws IOException {
if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
if (str != null) {
out.write(str);
}
return;
}
out.write(CSV_QUOTE);
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == CSV_QUOTE) {
out.write(CSV_QUOTE); // escape double quote
}
out.write(c);
}
out.write(CSV_QUOTE);
}
Returns a String
value for an unescaped CSV column.
If the value is enclosed in double quotes, and contains a comma, newline
or double quote, then quotes are removed.
Any double quote escaped characters (a pair of double quotes) are unescaped
to just one double quote.
If the value is not enclosed in double quotes, or is and does not contain a
comma, newline or double quote, then the String value is returned unchanged.
see Wikipedia and
RFC 4180.
Params: - str – the input CSV column String, may be null
Returns: the input String, with enclosing double quotes removed and embedded double
quotes unescaped, null
if null string input Since: 2.4
/**
* <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
*
* <p>If the value is enclosed in double quotes, and contains a comma, newline
* or double quote, then quotes are removed.
* </p>
*
* <p>Any double quote escaped characters (a pair of double quotes) are unescaped
* to just one double quote. </p>
*
* <p>If the value is not enclosed in double quotes, or is and does not contain a
* comma, newline or double quote, then the String value is returned unchanged.</p>
* </p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param str the input CSV column String, may be null
* @return the input String, with enclosing double quotes removed and embedded double
* quotes unescaped, <code>null</code> if null string input
* @since 2.4
*/
public static String unescapeCsv(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter();
unescapeCsv(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
Returns a String
value for an unescaped CSV column.
If the value is enclosed in double quotes, and contains a comma, newline
or double quote, then quotes are removed.
Any double quote escaped characters (a pair of double quotes) are unescaped
to just one double quote.
If the value is not enclosed in double quotes, or is and does not contain a
comma, newline or double quote, then the String value is returned unchanged.
see Wikipedia and
RFC 4180.
Params: - str – the input CSV column String, may be null
- out – Writer to write the input String to, with enclosing double quotes
removed and embedded double quotes unescaped,
null
if null string input
Throws: - IOException – if error occurs on underlying Writer
Since: 2.4
/**
* <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
*
* <p>If the value is enclosed in double quotes, and contains a comma, newline
* or double quote, then quotes are removed.
* </p>
*
* <p>Any double quote escaped characters (a pair of double quotes) are unescaped
* to just one double quote. </p>
*
* <p>If the value is not enclosed in double quotes, or is and does not contain a
* comma, newline or double quote, then the String value is returned unchanged.</p>
* </p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param str the input CSV column String, may be null
* @param out Writer to write the input String to, with enclosing double quotes
* removed and embedded double quotes unescaped, <code>null</code> if null string input
* @throws IOException if error occurs on underlying Writer
* @since 2.4
*/
public static void unescapeCsv(Writer out, String str) throws IOException {
if (str == null) {
return;
}
if (str.length() < 2) {
out.write(str);
return;
}
if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
out.write(str);
return;
}
// strip quotes
String quoteless = str.substring(1, str.length() - 1);
if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
// deal with escaped quotes; ie) ""
str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
}
out.write(str);
}
}