/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang3;
Operations on CharSequence
that are null
safe.
See Also: Since: 3.0
/**
* <p>Operations on {@link CharSequence} that are
* {@code null} safe.</p>
*
* @see CharSequence
* @since 3.0
*/
public class CharSequenceUtils {
private static final int NOT_FOUND = -1;
CharSequenceUtils
instances should NOT be constructed in standard programming.
This constructor is public to permit tools that require a JavaBean
instance to operate.
/**
* <p>{@code CharSequenceUtils} instances should NOT be constructed in
* standard programming. </p>
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public CharSequenceUtils() {
super();
}
//-----------------------------------------------------------------------
Returns a new CharSequence
that is a subsequence of this sequence starting with the char
value at the specified index.
This provides the CharSequence
equivalent to String.substring(int)
. The length (in char
) of the returned sequence is length() - start
, so if start == end
then an empty sequence is returned.
Params: - cs – the specified subsequence, null returns null
- start – the start index, inclusive, valid
Throws: - IndexOutOfBoundsException – if
start
is negative or if start
is greater than length()
Returns: a new subsequence, may be null
/**
* <p>Returns a new {@code CharSequence} that is a subsequence of this
* sequence starting with the {@code char} value at the specified index.</p>
*
* <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
* The length (in {@code char}) of the returned sequence is {@code length() - start},
* so if {@code start == end} then an empty sequence is returned.</p>
*
* @param cs the specified subsequence, null returns null
* @param start the start index, inclusive, valid
* @return a new subsequence, may be null
* @throws IndexOutOfBoundsException if {@code start} is negative or if
* {@code start} is greater than {@code length()}
*/
public static CharSequence subSequence(final CharSequence cs, final int start) {
return cs == null ? null : cs.subSequence(start, cs.length());
}
//-----------------------------------------------------------------------
Returns the index within cs
of the first occurrence of the
specified character, starting the search at the specified index.
If a character with value searchChar
occurs in the
character sequence represented by the cs
object at an index no smaller than start
, then
the index of the first such occurrence is returned. For values
of searchChar
in the range from 0 to 0xFFFF (inclusive),
this is the smallest value k such that:
(this.charAt(k) == searchChar) && (k >= start)
is true. For other values of searchChar
, it is the
smallest value k such that:
(this.codePointAt(k) == searchChar) && (k >= start)
is true. In either case, if no such character occurs inm cs
at or after position start
, then
-1
is returned.
There is no restriction on the value of start
. If it
is negative, it has the same effect as if it were zero: the entire
CharSequence
may be searched. If it is greater than
the length of cs
, it has the same effect as if it were
equal to the length of cs
: -1
is returned.
All indices are specified in char
values
(Unicode code units).
Params: - cs – the
CharSequence
to be processed, not null - searchChar – the char to be searched for
- start – the start index, negative starts at the string start
Returns: the index where the search char was found, -1 if not found Since: 3.6 updated to behave more like String
/**
* Returns the index within <code>cs</code> of the first occurrence of the
* specified character, starting the search at the specified index.
* <p>
* If a character with value <code>searchChar</code> occurs in the
* character sequence represented by the <code>cs</code>
* object at an index no smaller than <code>start</code>, then
* the index of the first such occurrence is returned. For values
* of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
* this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
* </pre></blockquote>
* is true. In either case, if no such character occurs inm <code>cs</code>
* at or after position <code>start</code>, then
* <code>-1</code> is returned.
*
* <p>
* There is no restriction on the value of <code>start</code>. If it
* is negative, it has the same effect as if it were zero: the entire
* <code>CharSequence</code> may be searched. If it is greater than
* the length of <code>cs</code>, it has the same effect as if it were
* equal to the length of <code>cs</code>: <code>-1</code> is returned.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed, not null
* @param searchChar the char to be searched for
* @param start the start index, negative starts at the string start
* @return the index where the search char was found, -1 if not found
* @since 3.6 updated to behave more like <code>String</code>
*/
static int indexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
return ((String) cs).indexOf(searchChar, start);
}
final int sz = cs.length();
if (start < 0) {
start = 0;
}
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
for (int i = start; i < sz; i++) {
if (cs.charAt(i) == searchChar) {
return i;
}
}
}
//supplementary characters (LANG1300)
if (searchChar <= Character.MAX_CODE_POINT) {
final char[] chars = Character.toChars(searchChar);
for (int i = start; i < sz - 1; i++) {
final char high = cs.charAt(i);
final char low = cs.charAt(i + 1);
if (high == chars[0] && low == chars[1]) {
return i;
}
}
}
return NOT_FOUND;
}
Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
Params: - cs – the
CharSequence
to be processed - searchChar – the
CharSequence
to be searched for - start – the start index
Returns: the index where the search sequence was found
/**
* Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
*
* @param cs the {@code CharSequence} to be processed
* @param searchChar the {@code CharSequence} to be searched for
* @param start the start index
* @return the index where the search sequence was found
*/
static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
return cs.toString().indexOf(searchChar.toString(), start);
// if (cs instanceof String && searchChar instanceof String) {
// // TODO: Do we assume searchChar is usually relatively small;
// // If so then calling toString() on it is better than reverting to
// // the green implementation in the else block
// return ((String) cs).indexOf((String) searchChar, start);
// } else {
// // TODO: Implement rather than convert to String
// return cs.toString().indexOf(searchChar.toString(), start);
// }
}
Returns the index within cs
of the last occurrence of
the specified character, searching backward starting at the
specified index. For values of searchChar
in the range
from 0 to 0xFFFF (inclusive), the index returned is the largest
value k such that:
(this.charAt(k) == searchChar) && (k <= start)
is true. For other values of searchChar
, it is the
largest value k such that:
(this.codePointAt(k) == searchChar) && (k <= start)
is true. In either case, if no such character occurs in cs
at or before position start
, then -1
is returned.
All indices are specified in char
values
(Unicode code units).
Params: - cs – the
CharSequence
to be processed - searchChar – the char to be searched for
- start – the start index, negative returns -1, beyond length starts at end
Returns: the index where the search char was found, -1 if not found Since: 3.6 updated to behave more like String
/**
* Returns the index within <code>cs</code> of the last occurrence of
* the specified character, searching backward starting at the
* specified index. For values of <code>searchChar</code> in the range
* from 0 to 0xFFFF (inclusive), the index returned is the largest
* value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
* </pre></blockquote>
* is true. In either case, if no such character occurs in <code>cs</code>
* at or before position <code>start</code>, then <code>-1</code> is returned.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed
* @param searchChar the char to be searched for
* @param start the start index, negative returns -1, beyond length starts at end
* @return the index where the search char was found, -1 if not found
* @since 3.6 updated to behave more like <code>String</code>
*/
static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
return ((String) cs).lastIndexOf(searchChar, start);
}
final int sz = cs.length();
if (start < 0) {
return NOT_FOUND;
}
if (start >= sz) {
start = sz - 1;
}
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
for (int i = start; i >= 0; --i) {
if (cs.charAt(i) == searchChar) {
return i;
}
}
}
//supplementary characters (LANG1300)
//NOTE - we must do a forward traversal for this to avoid duplicating code points
if (searchChar <= Character.MAX_CODE_POINT) {
final char[] chars = Character.toChars(searchChar);
//make sure it's not the last index
if (start == sz - 1) {
return NOT_FOUND;
}
for (int i = start; i >= 0; i--) {
final char high = cs.charAt(i);
final char low = cs.charAt(i + 1);
if (chars[0] == high && chars[1] == low) {
return i;
}
}
}
return NOT_FOUND;
}
Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
Params: - cs – the
CharSequence
to be processed - searchChar – the
CharSequence
to be searched for - start – the start index
Returns: the index where the search sequence was found
/**
* Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
*
* @param cs the {@code CharSequence} to be processed
* @param searchChar the {@code CharSequence} to be searched for
* @param start the start index
* @return the index where the search sequence was found
*/
static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
return cs.toString().lastIndexOf(searchChar.toString(), start);
// if (cs instanceof String && searchChar instanceof String) {
// // TODO: Do we assume searchChar is usually relatively small;
// // If so then calling toString() on it is better than reverting to
// // the green implementation in the else block
// return ((String) cs).lastIndexOf((String) searchChar, start);
// } else {
// // TODO: Implement rather than convert to String
// return cs.toString().lastIndexOf(searchChar.toString(), start);
// }
}
Green implementation of toCharArray.
Params: - cs – the
CharSequence
to be processed
Returns: the resulting char array
/**
* Green implementation of toCharArray.
*
* @param cs the {@code CharSequence} to be processed
* @return the resulting char array
*/
static char[] toCharArray(final CharSequence cs) {
if (cs instanceof String) {
return ((String) cs).toCharArray();
}
final int sz = cs.length();
final char[] array = new char[cs.length()];
for (int i = 0; i < sz; i++) {
array[i] = cs.charAt(i);
}
return array;
}
Green implementation of regionMatches.
Params: - cs – the
CharSequence
to be processed - ignoreCase – whether or not to be case insensitive
- thisStart – the index to start on the
cs
CharSequence - substring – the
CharSequence
to be looked for - start – the index to start on the
substring
CharSequence - length – character length of the region
Returns: whether the region matched
/**
* Green implementation of regionMatches.
*
* @param cs the {@code CharSequence} to be processed
* @param ignoreCase whether or not to be case insensitive
* @param thisStart the index to start on the {@code cs} CharSequence
* @param substring the {@code CharSequence} to be looked for
* @param start the index to start on the {@code substring} CharSequence
* @param length character length of the region
* @return whether the region matched
*/
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
final CharSequence substring, final int start, final int length) {
if (cs instanceof String && substring instanceof String) {
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
}
int index1 = thisStart;
int index2 = start;
int tmpLen = length;
// Extract these first so we detect NPEs the same as the java.lang.String version
final int srcLen = cs.length() - thisStart;
final int otherLen = substring.length() - start;
// Check for invalid parameters
if (thisStart < 0 || start < 0 || length < 0) {
return false;
}
// Check that the regions are long enough
if (srcLen < length || otherLen < length) {
return false;
}
while (tmpLen-- > 0) {
final char c1 = cs.charAt(index1++);
final char c2 = substring.charAt(index2++);
if (c1 == c2) {
continue;
}
if (!ignoreCase) {
return false;
}
// The same check as in String.regionMatches():
if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
&& Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
return false;
}
}
return true;
}
}