package org.jsoup.internal;

import org.jsoup.helper.Validate;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Stack;

A minimal String utility class. Designed for internal jsoup use only.
/** * A minimal String utility class. Designed for internal jsoup use only. */
public final class StringUtil { // memoised padding up to 21 static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
Join a collection of strings by a separator
Params:
  • strings – collection of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join a collection of strings by a separator * @param strings collection of string objects * @param sep string to place between strings * @return joined string */
public static String join(Collection strings, String sep) { return join(strings.iterator(), sep); }
Join a collection of strings by a separator
Params:
  • strings – iterator of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join a collection of strings by a separator * @param strings iterator of string objects * @param sep string to place between strings * @return joined string */
public static String join(Iterator strings, String sep) { if (!strings.hasNext()) return ""; String start = strings.next().toString(); if (!strings.hasNext()) // only one, avoid builder return start; StringBuilder sb = StringUtil.borrowBuilder().append(start); while (strings.hasNext()) { sb.append(sep); sb.append(strings.next()); } return StringUtil.releaseBuilder(sb); }
Join an array of strings by a separator
Params:
  • strings – collection of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join an array of strings by a separator * @param strings collection of string objects * @param sep string to place between strings * @return joined string */
public static String join(String[] strings, String sep) { return join(Arrays.asList(strings), sep); }
Returns space padding
Params:
  • width – amount of padding desired
Returns:string of spaces * width
/** * Returns space padding * @param width amount of padding desired * @return string of spaces * width */
public static String padding(int width) { if (width < 0) throw new IllegalArgumentException("width must be > 0"); if (width < padding.length) return padding[width]; char[] out = new char[width]; for (int i = 0; i < width; i++) out[i] = ' '; return String.valueOf(out); }
Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
Params:
  • string – string to test
Returns:if string is blank
/** * Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc) * @param string string to test * @return if string is blank */
public static boolean isBlank(String string) { if (string == null || string.length() == 0) return true; int l = string.length(); for (int i = 0; i < l; i++) { if (!StringUtil.isWhitespace(string.codePointAt(i))) return false; } return true; }
Tests if a string is numeric, i.e. contains only digit characters
Params:
  • string – string to test
Returns:true if only digit chars, false if empty or null or contains non-digit chars
/** * Tests if a string is numeric, i.e. contains only digit characters * @param string string to test * @return true if only digit chars, false if empty or null or contains non-digit chars */
public static boolean isNumeric(String string) { if (string == null || string.length() == 0) return false; int l = string.length(); for (int i = 0; i < l; i++) { if (!Character.isDigit(string.codePointAt(i))) return false; } return true; }
Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
Params:
  • c – code point to test
See Also:
Returns:true if code point is whitespace, false otherwise
/** * Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML. * @param c code point to test * @return true if code point is whitespace, false otherwise * @see #isActuallyWhitespace(int) */
public static boolean isWhitespace(int c){ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r'; }
Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
Params:
  • c – code point to test
Returns:true if code point is whitespace, false otherwise
/** * Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc. * @param c code point to test * @return true if code point is whitespace, false otherwise */
public static boolean isActuallyWhitespace(int c){ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160; // 160 is &nbsp; (non-breaking space). Not in the spec but expected. } public static boolean isInvisibleChar(int c) { return c == 8203 || c == 173; // zero width sp, soft hyphen // previously also included zw non join, zw join - but removing those breaks semantic meaning of text }
Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters (e.g. newline, tab) convert to a simple space
Params:
  • string – content to normalise
Returns:normalised string
/** * Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters * (e.g. newline, tab) convert to a simple space * @param string content to normalise * @return normalised string */
public static String normaliseWhitespace(String string) { StringBuilder sb = StringUtil.borrowBuilder(); appendNormalisedWhitespace(sb, string, false); return StringUtil.releaseBuilder(sb); }
After normalizing the whitespace within a string, appends it to a string builder.
Params:
  • accum – builder to append to
  • string – string to normalize whitespace within
  • stripLeading – set to true if you wish to remove any leading whitespace
/** * After normalizing the whitespace within a string, appends it to a string builder. * @param accum builder to append to * @param string string to normalize whitespace within * @param stripLeading set to true if you wish to remove any leading whitespace */
public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) { boolean lastWasWhite = false; boolean reachedNonWhite = false; int len = string.length(); int c; for (int i = 0; i < len; i+= Character.charCount(c)) { c = string.codePointAt(i); if (isActuallyWhitespace(c)) { if ((stripLeading && !reachedNonWhite) || lastWasWhite) continue; accum.append(' '); lastWasWhite = true; } else if (!isInvisibleChar(c)) { accum.appendCodePoint(c); lastWasWhite = false; reachedNonWhite = true; } } } public static boolean in(final String needle, final String... haystack) { final int len = haystack.length; for (int i = 0; i < len; i++) { if (haystack[i].equals(needle)) return true; } return false; } public static boolean inSorted(String needle, String[] haystack) { return Arrays.binarySearch(haystack, needle) >= 0; }
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params:
  • base – the existing absolute base URL
  • relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Throws:
Returns:the resolved absolute URL
/** * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. * @param base the existing absolute base URL * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) * @return the resolved absolute URL * @throws MalformedURLException if an error occurred generating the URL */
public static URL resolve(URL base, String relUrl) throws MalformedURLException { // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired if (relUrl.startsWith("?")) relUrl = base.getPath() + relUrl; // workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) { base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile()); } return new URL(base, relUrl); }
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params:
  • baseUrl – the existing absolute base URL
  • relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Returns:an absolute URL if one was able to be generated, or the empty string if not
/** * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. * @param baseUrl the existing absolute base URL * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) * @return an absolute URL if one was able to be generated, or the empty string if not */
public static String resolve(final String baseUrl, final String relUrl) { URL base; try { try { base = new URL(baseUrl); } catch (MalformedURLException e) { // the base is unsuitable, but the attribute/rel may be abs on its own, so try that URL abs = new URL(relUrl); return abs.toExternalForm(); } return resolve(base, relUrl).toExternalForm(); } catch (MalformedURLException e) { return ""; } } private static final Stack<StringBuilder> builders = new Stack<>();
Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is prevented from growing too large.

Care must be taken to release the builder once its work has been completed, with releaseBuilder

Returns:an empty StringBuilder
/** * Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is * prevented from growing too large. * <p> * Care must be taken to release the builder once its work has been completed, with {@link #releaseBuilder} * @return an empty StringBuilder */
public static StringBuilder borrowBuilder() { synchronized (builders) { return builders.empty() ? new StringBuilder(MaxCachedBuilderSize) : builders.pop(); } }
Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its contents may be changed by this method, or by a concurrent thread.
Params:
  • sb – the StringBuilder to release.
Returns:the string value of the released String Builder (as an incentive to release it!).
/** * Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its * contents may be changed by this method, or by a concurrent thread. * @param sb the StringBuilder to release. * @return the string value of the released String Builder (as an incentive to release it!). */
public static String releaseBuilder(StringBuilder sb) { Validate.notNull(sb); String string = sb.toString(); if (sb.length() > MaxCachedBuilderSize) sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big else sb.delete(0, sb.length()); // make sure it's emptied on release synchronized (builders) { builders.push(sb); while (builders.size() > MaxIdleBuilders) { builders.pop(); } } return string; } private static final int MaxCachedBuilderSize = 8 * 1024; private static final int MaxIdleBuilders = 8; }