Copyright (c) 2000, 2010 IBM Corporation and others.
This program and the accompanying materials
are made available under the terms of the Eclipse Public License 2.0
which accompanies this distribution, and is available at
https://www.eclipse.org/legal/epl-2.0/
SPDX-License-Identifier: EPL-2.0
Contributors:
IBM Corporation - initial API and implementation
/*******************************************************************************
* Copyright (c) 2000, 2010 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.jdt.internal.core.search;
import org.eclipse.jdt.core.compiler.CharOperation;
import org.eclipse.jdt.internal.compiler.parser.ScannerHelper;
This class is a collection of helper methods to manipulate strings during search.
/**
* This class is a collection of helper methods to manipulate strings during search.
*/
public final class StringOperation {
private final static int[] EMPTY_REGIONS = new int[0];
Answers all the regions in a given name matching a given camel case pattern.
Each of these regions is made of its starting index and its length in the given
name. They are all concatenated in a single array of int
which therefore always has an even length.
Note that each region is disjointed from the following one.
E.g. if the regions are { start1, length1, start2, length2 }
,
then start1+length1
will always be smaller than
start2
.
Examples:
- pattern = "NPE"
name = NullPointerException / NoPermissionException
result: { 0, 1, 4, 1, 11, 1 } / { 0, 1, 2, 1, 12, 1 }
- pattern = "NuPoEx"
name = NullPointerException
result: { 0, 2, 4, 2, 11, 2 }
- pattern = "IPL3"
name = "IPerspectiveListener3"
result: { 0, 2, 12, 1, 20, 1 }
- pattern = "HashME"
name = "HashMapEntry"
result: { 0, 5, 7, 1 }
Params: - pattern – the given pattern
- patternStart – the start index of the pattern, inclusive
- patternEnd – the end index of the pattern, exclusive
- name – the given name
- nameStart – the start index of the name, inclusive
- nameEnd – the end index of the name, exclusive
- samePartCount – flag telling whether the pattern and the name should
have the same count of parts or not.
For example:
- 'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types,
but not 'HashMapEntry'
- 'HMap' type string pattern will still match previous 'HashMap' and
'HtmlMapper' types, but not 'HighMagnitude'
See Also: -
for more details on the camel case behavior
- for more details on the
pattern match behavior
Returns: an array of int
having two slots per returned
regions (first one is the starting index of the region and the second
one the length of the region).
Note that it may be null
if the given name does not match
the pattern Since: 3.5
/**
* Answers all the regions in a given name matching a given camel case pattern.
* </p><p>
* Each of these regions is made of its starting index and its length in the given
* name. They are all concatenated in a single array of <code>int</code>
* which therefore always has an even length.
* </p><p>
* Note that each region is disjointed from the following one.<br>
* E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>,
* then <code>start1+length1</code> will always be smaller than
* <code>start2</code>.
* </p><p>
* <pre>
* Examples:
* <ol><li> pattern = "NPE"
* name = NullPointerException / NoPermissionException
* result: { 0, 1, 4, 1, 11, 1 } / { 0, 1, 2, 1, 12, 1 } </li>
* <li> pattern = "NuPoEx"
* name = NullPointerException
* result: { 0, 2, 4, 2, 11, 2 }</li>
* <li> pattern = "IPL3"
* name = "IPerspectiveListener3"
* result: { 0, 2, 12, 1, 20, 1 }</li>
* <li> pattern = "HashME"
* name = "HashMapEntry"
* result: { 0, 5, 7, 1 }</li>
* </ol></pre>
*</p>
* @see CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean)
* for more details on the camel case behavior
* @see CharOperation#match(char[], char[], boolean) for more details on the
* pattern match behavior
*
* @param pattern the given pattern
* @param patternStart the start index of the pattern, inclusive
* @param patternEnd the end index of the pattern, exclusive
* @param name the given name
* @param nameStart the start index of the name, inclusive
* @param nameEnd the end index of the name, exclusive
* @param samePartCount flag telling whether the pattern and the name should
* have the same count of parts or not.<br>
* For example:
* <ul>
* <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types,
* but not 'HashMapEntry'</li>
* <li>'HMap' type string pattern will still match previous 'HashMap' and
* 'HtmlMapper' types, but not 'HighMagnitude'</li>
* </ul>
* @return an array of <code>int</code> having two slots per returned
* regions (first one is the starting index of the region and the second
* one the length of the region).<br>
* Note that it may be <code>null</code> if the given name does not match
* the pattern
* @since 3.5
*/
public static final int[] getCamelCaseMatchingRegions(String pattern, int patternStart, int patternEnd, String name, int nameStart, int nameEnd, boolean samePartCount) {
/* !!!!!!!!!! WARNING !!!!!!!!!!
* The algorithm used in this method has been fully inspired from
* CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean).
*
* So, if any change needs to be applied in the algorithm, do NOT forget
* to backport it in the CharOperation method!
*/
if (name == null)
return null; // null name cannot match
if (pattern == null) {
// null pattern cannot match any region
// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
return EMPTY_REGIONS;
}
if (patternEnd < 0) patternEnd = pattern.length();
if (nameEnd < 0) nameEnd = name.length();
if (patternEnd <= patternStart) {
return nameEnd <= nameStart
? new int[] { patternStart, patternEnd-patternStart }
: null;
}
if (nameEnd <= nameStart) return null;
// check first pattern char
if (name.charAt(nameStart) != pattern.charAt(patternStart)) {
// first char must strictly match (upper/lower)
return null;
}
char patternChar, nameChar;
int iPattern = patternStart;
int iName = nameStart;
// init segments
int parts = 1;
for (int i=patternStart+1; i<patternEnd; i++) {
final char ch = pattern.charAt(i);
if (ch < ScannerHelper.MAX_OBVIOUS) {
if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[ch] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) != 0) {
parts++;
}
} else if (Character.isJavaIdentifierPart(ch) && (Character.isUpperCase(ch) || Character.isDigit(ch))) {
parts++;
}
}
int[] segments = null;
int count = 0; // count
// Main loop is on pattern characters
int segmentStart = iName;
while (true) {
iPattern++;
iName++;
if (iPattern == patternEnd) { // we have exhausted pattern...
// it's a match if the name can have additional parts (i.e. uppercase characters) or is also exhausted
if (!samePartCount || iName == nameEnd) {
if (segments == null) {
segments = new int[2];
}
segments[count++] = segmentStart;
segments[count++] = iName - segmentStart;
if (count < segments.length) {
System.arraycopy(segments, 0, segments = new int[count], 0, count);
}
return segments;
}
// otherwise it's a match only if the name has no more uppercase characters
int segmentEnd = iName;
while (true) {
if (iName == nameEnd) {
// we have exhausted the name, so it's a match
if (segments == null) {
segments = new int[2];
}
segments[count++] = segmentStart;
segments[count++] = segmentEnd - segmentStart;
if (count < segments.length) {
System.arraycopy(segments, 0, segments = new int[count], 0, count);
}
return segments;
}
nameChar = name.charAt(iName);
// test if the name character is uppercase
if (nameChar < ScannerHelper.MAX_OBVIOUS) {
if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar] & ScannerHelper.C_UPPER_LETTER) != 0) {
return null;
}
}
else if (!Character.isJavaIdentifierPart(nameChar) || Character.isUpperCase(nameChar)) {
return null;
}
iName++;
}
}
if (iName == nameEnd){
// We have exhausted the name (and not the pattern), so it's not a match
return null;
}
// For as long as we're exactly matching, bring it on (even if it's a lower case character)
if ((patternChar = pattern.charAt(iPattern)) == name.charAt(iName)) {
continue;
}
int segmentEnd = iName;
// If characters are not equals, then it's not a match if patternChar is lowercase
if (patternChar < ScannerHelper.MAX_OBVIOUS) {
if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[patternChar] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) == 0) {
return null;
}
} else if (Character.isJavaIdentifierPart(patternChar) && !Character.isUpperCase(patternChar) && !Character.isDigit(patternChar)) {
return null;
}
// patternChar is uppercase, so let's find the next uppercase in name
while (true) {
if (iName == nameEnd){
// We have exhausted name (and not pattern), so it's not a match
return null;
}
nameChar = name.charAt(iName);
if (nameChar < ScannerHelper.MAX_OBVIOUS) {
int charNature = ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar];
if ((charNature & (ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_SPECIAL)) != 0) {
// nameChar is lowercase
iName++;
} else if ((charNature & ScannerHelper.C_DIGIT) != 0) {
// nameChar is digit => break if the digit is current pattern character otherwise consume it
if (patternChar == nameChar) break;
iName++;
// nameChar is uppercase...
} else if (patternChar != nameChar) {
//.. and it does not match patternChar, so it's not a match
return null;
} else {
//.. and it matched patternChar. Back to the big loop
break;
}
}
// Same tests for non-obvious characters
else if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar)) {
iName++;
} else if (Character.isDigit(nameChar)) {
if (patternChar == nameChar) break;
iName++;
} else if (patternChar != nameChar) {
return null;
} else {
break;
}
}
// At this point, either name has been exhausted, or it is at an uppercase letter.
// Since pattern is also at an uppercase letter
if (segments == null) {
segments = new int[parts*2];
}
segments[count++] = segmentStart;
segments[count++] = segmentEnd - segmentStart;
segmentStart = iName;
}
}
Answers all the regions in a given name matching a given pattern
pattern (e.g. "H*M??").
Each of these regions is made of its starting index and its length in the given
name. They are all concatenated in a single array of int
which therefore always has an even length.
Note that each region is disjointed from the following one.
E.g. if the regions are { start1, length1, start2, length2 }
,
then start1+length1
will always be smaller than
start2
.
Examples:
- pattern = "N???Po*Ex?eption"
name = NullPointerException
result: { 0, 1, 4, 2, 11, 2, 14, 6 }
- pattern = "Ha*M*ent*"
name = "HashMapEntry"
result: { 0, 2, 4, 1, 7, 3 }
Params: - pattern – the given pattern
- patternStart – the given pattern start
- patternEnd – the given pattern end
- name – the given name
- nameStart – the given name start
- nameEnd – the given name end
- isCaseSensitive – flag to know if the matching should be case sensitive
See Also: - for more details on the
pattern match behavior
Returns: an array of int
having two slots per returned
regions (first one is the starting index of the region and the second
one the length of the region).
Note that it may be null
if the given name does not match
the pattern Since: 3.5
/**
* Answers all the regions in a given name matching a given <i>pattern</i>
* pattern (e.g. "H*M??").
* </p><p>
* Each of these regions is made of its starting index and its length in the given
* name. They are all concatenated in a single array of <code>int</code>
* which therefore always has an even length.
* </p><p>
* Note that each region is disjointed from the following one.<br>
* E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>,
* then <code>start1+length1</code> will always be smaller than
* <code>start2</code>.
* </p><p>
* <pre>
* Examples:
* <ol>
* <li> pattern = "N???Po*Ex?eption"
* name = NullPointerException
* result: { 0, 1, 4, 2, 11, 2, 14, 6 }</li>
* <li> pattern = "Ha*M*ent*"
* name = "HashMapEntry"
* result: { 0, 2, 4, 1, 7, 3 }</li>
* </ol></pre>
*</p>
* @see CharOperation#match(char[], char[], boolean) for more details on the
* pattern match behavior
*
* @param pattern the given pattern
* @param patternStart the given pattern start
* @param patternEnd the given pattern end
* @param name the given name
* @param nameStart the given name start
* @param nameEnd the given name end
* @param isCaseSensitive flag to know if the matching should be case sensitive
* @return an array of <code>int</code> having two slots per returned
* regions (first one is the starting index of the region and the second
* one the length of the region).<br>
* Note that it may be <code>null</code> if the given name does not match
* the pattern
* @since 3.5
*/
public static final int[] getPatternMatchingRegions(
String pattern,
int patternStart,
int patternEnd,
String name,
int nameStart,
int nameEnd,
boolean isCaseSensitive) {
/* !!!!!!!!!! WARNING !!!!!!!!!!
* The algorithm used in this method has been fully inspired from
* CharOperation#match(char[], int, int, char[], int, int, boolean).
*
* So, if any change needs to be applied in the algorithm, do NOT forget
* to backport it in the CharOperation method!
*/
if (name == null) return null; // null name cannot match
if (pattern == null) {
// null pattern cannot match any region
// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
return EMPTY_REGIONS;
}
int iPattern = patternStart;
int iName = nameStart;
// init segments parts
if (patternEnd < 0)
patternEnd = pattern.length();
if (nameEnd < 0)
nameEnd = name.length();
int questions = 0;
int parts = 0;
char previous = 0;
for (int i=patternStart; i<patternEnd; i++) {
char ch = pattern.charAt(i);
switch (ch) {
case '?':
questions++;
break;
case '*':
break;
default:
switch (previous) {
case 0 :
case '?':
case '*':
parts++;
break;
}
}
previous = ch;
}
if (parts == 0) {
if (questions <= (nameEnd - nameStart)) return EMPTY_REGIONS;
return null;
}
int[] segments = new int[parts*2];
/* check first segment */
int count = 0;
int start = iName;
char patternChar = 0;
previous = 0;
while ((iPattern < patternEnd)
&& (patternChar = pattern.charAt(iPattern)) != '*') {
if (iName == nameEnd)
return null;
if (patternChar == '?') {
switch (previous) {
case 0:
case '?':
break;
default:
segments[count++] = start;
segments[count++] = iPattern-start;
break;
}
} else {
if (isCaseSensitive) {
if (patternChar != name.charAt(iName)) {
return null;
}
} else if (ScannerHelper.toLowerCase(patternChar) != ScannerHelper.toLowerCase(name.charAt(iName))) {
return null;
}
switch (previous) {
case 0:
case '?':
start = iPattern;
break;
}
}
iName++;
iPattern++;
previous = patternChar;
}
/* check sequence of star+segment */
int segmentStart;
if (patternChar == '*') {
if (iPattern > 0 && previous != '?') {
segments[count++] = start;
segments[count++] = iName-start;
start = iName;
}
segmentStart = ++iPattern; // skip star
} else {
if (iName == nameEnd) {
if (count == (parts*2)) return segments;
int end = patternEnd;
if (previous == '?') { // last char was a '?' => purge all trailing '?'
while (pattern.charAt(--end-1) == '?') {
if (end == start) {
return new int[] { patternStart, patternEnd-patternStart };
}
}
}
return new int[] { start, end-start };
}
return null;
}
int prefixStart = iName;
int previousCount = count;
previous = patternChar;
char previousSegment = patternChar;
checkSegment : while (iName < nameEnd) {
if (iPattern == patternEnd) {
iPattern = segmentStart; // mismatch - restart current segment
iName = ++prefixStart;
previous = previousSegment;
continue checkSegment;
}
/* segment is ending */
if ((patternChar = pattern.charAt(iPattern)) == '*') {
segmentStart = ++iPattern; // skip star
if (segmentStart == patternEnd) {
if (count < (parts*2)) {
segments[count++] = start;
segments[count++] = iName-start;
}
return segments;
}
switch (previous) {
case '*':
case '?':
break;
default:
segments[count++] = start;
segments[count++] = iName-start;
break;
}
prefixStart = iName;
start = prefixStart;
previous = patternChar;
previousSegment = patternChar;
continue checkSegment;
}
/* check current name character */
previousCount = count;
if (patternChar == '?') {
switch (previous) {
case '*':
case '?':
break;
default:
segments[count++] = start;
segments[count++] = iName-start;
break;
}
} else {
boolean mismatch;
if (isCaseSensitive) {
mismatch = name.charAt(iName) != patternChar;
} else {
mismatch = ScannerHelper.toLowerCase(name.charAt(iName)) != ScannerHelper.toLowerCase(patternChar);
}
if (mismatch) {
iPattern = segmentStart; // mismatch - restart current segment
iName = ++prefixStart;
start = prefixStart;
count = previousCount;
previous = previousSegment;
continue checkSegment;
}
switch (previous) {
case '?':
start = iName;
break;
}
}
iName++;
iPattern++;
previous = patternChar;
}
if ((segmentStart == patternEnd)
|| (iName == nameEnd && iPattern == patternEnd)
|| (iPattern == patternEnd - 1 && pattern.charAt(iPattern) == '*')) {
if (count < (parts*2)) {
segments[count++] = start;
segments[count++] = iName-start;
}
return segments;
}
return null;
}
}