Copyright (c) 2000, 2011 IBM Corporation and others. This program and the accompanying materials are made available under the terms of the Eclipse Public License 2.0 which accompanies this distribution, and is available at https://www.eclipse.org/legal/epl-2.0/ SPDX-License-Identifier: EPL-2.0 Contributors: IBM Corporation - initial API and implementation
/******************************************************************************* * Copyright (c) 2000, 2011 IBM Corporation and others. * * This program and the accompanying materials * are made available under the terms of the Eclipse Public License 2.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/legal/epl-2.0/ * * SPDX-License-Identifier: EPL-2.0 * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/
package org.eclipse.jdt.internal.ui.text; import java.text.CharacterIterator; import com.ibm.icu.text.BreakIterator; import org.eclipse.core.runtime.Assert;
A java break iterator. It returns all breaks, including before and after whitespace, and it returns all camel case breaks.

A line break may be any of "\n", "\r", "\r\n", "\n\r".

Since:3.0
/** * A java break iterator. It returns all breaks, including before and after * whitespace, and it returns all camel case breaks. * <p> * A line break may be any of "\n", "\r", "\r\n", "\n\r". * </p> * * @since 3.0 */
public class JavaBreakIterator extends BreakIterator {
A run of common characters.
/** * A run of common characters. */
protected static abstract class Run {
The length of this run.
/** The length of this run. */
protected int length; public Run() { init(); }
Returns true if this run consumes ch, false otherwise. If true is returned, the length of the receiver is adjusted accordingly.
Params:
  • ch – the character to test
Returns:true if ch was consumed
/** * Returns <code>true</code> if this run consumes <code>ch</code>, * <code>false</code> otherwise. If <code>true</code> is returned, * the length of the receiver is adjusted accordingly. * * @param ch the character to test * @return <code>true</code> if <code>ch</code> was consumed */
protected boolean consume(char ch) { if (isValid(ch)) { length++; return true; } return false; }
Whether this run accepts that character; does not update state. Called from the default implementation of consume.
Params:
  • ch – the character to test
Returns:true if ch is accepted
/** * Whether this run accepts that character; does not update state. Called * from the default implementation of <code>consume</code>. * * @param ch the character to test * @return <code>true</code> if <code>ch</code> is accepted */
protected abstract boolean isValid(char ch);
Resets this run to the initial state.
/** * Resets this run to the initial state. */
protected void init() { length= 0; } } static final class Whitespace extends Run { @Override protected boolean isValid(char ch) { return Character.isWhitespace(ch) && ch != '\n' && ch != '\r'; } } static final class LineDelimiter extends Run {
State: INIT -> delimiter -> EXIT.
/** State: INIT -> delimiter -> EXIT. */
private char fState; private static final char INIT= '\0'; private static final char EXIT= '\1'; /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init() */ @Override protected void init() { super.init(); fState= INIT; } /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char) */ @Override protected boolean consume(char ch) { if (!isValid(ch) || fState == EXIT) return false; if (fState == INIT) { fState= ch; length++; return true; } else if (fState != ch) { fState= EXIT; length++; return true; } else { return false; } } @Override protected boolean isValid(char ch) { return ch == '\n' || ch == '\r'; } } static final class Identifier extends Run { /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) */ @Override protected boolean isValid(char ch) { return Character.isJavaIdentifierPart(ch); } } static final class CamelCaseIdentifier extends Run { /* states */ private static final int S_INIT= 0; private static final int S_LOWER= 1; private static final int S_ONE_CAP= 2; private static final int S_ALL_CAPS= 3; private static final int S_EXIT= 4; private static final int S_EXIT_MINUS_ONE= 5; /* character types */ private static final int K_INVALID= 0; private static final int K_LOWER= 1; private static final int K_UPPER= 2; private static final int K_OTHER= 3; private int fState; private final static int[][] MATRIX= new int[][] { // K_INVALID, K_LOWER, K_UPPER, K_OTHER { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS }; /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init() */ @Override protected void init() { super.init(); fState= S_INIT; } /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char) */ @Override protected boolean consume(char ch) { int kind= getKind(ch); fState= MATRIX[fState][kind]; switch (fState) { case S_LOWER: case S_ONE_CAP: case S_ALL_CAPS: length++; return true; case S_EXIT: return false; case S_EXIT_MINUS_ONE: length--; return false; default: Assert.isTrue(false); return false; } }
Determines the kind of a character.
Params:
  • ch – the character to test
/** * Determines the kind of a character. * * @param ch the character to test */
private int getKind(char ch) { if (Character.isUpperCase(ch)) return K_UPPER; if (Character.isLowerCase(ch)) return K_LOWER; if (Character.isJavaIdentifierPart(ch)) // _, digits... return K_OTHER; return K_INVALID; } /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) */ @Override protected boolean isValid(char ch) { return Character.isJavaIdentifierPart(ch); } } static final class Other extends Run { /* * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) */ @Override protected boolean isValid(char ch) { return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch); } } private static final Run WHITESPACE= new Whitespace(); private static final Run DELIMITER= new LineDelimiter(); private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier(); private static final Run OTHER= new Other();
The platform break iterator (word instance) used as a base.
/** The platform break iterator (word instance) used as a base. */
protected final BreakIterator fIterator;
The text we operate on.
/** The text we operate on. */
protected CharSequence fText;
our current position for the stateful methods.
/** our current position for the stateful methods. */
private int fIndex;
Creates a new break iterator.
/** * Creates a new break iterator. */
public JavaBreakIterator() { fIterator= BreakIterator.getWordInstance(); fIndex= fIterator.current(); } /* * @see java.text.BreakIterator#current() */ @Override public int current() { return fIndex; } /* * @see java.text.BreakIterator#first() */ @Override public int first() { fIndex= fIterator.first(); return fIndex; } /* * @see java.text.BreakIterator#following(int) */ @Override public int following(int offset) { // work around too eager IAEs in standard implementation if (offset == getText().getEndIndex()) return DONE; int next= fIterator.following(offset); if (next == DONE) return DONE; // TODO deal with complex script word boundaries // Math.min(offset + run.length, next) does not work // since BreakIterator.getWordInstance considers _ as boundaries // seems to work fine, however Run run= consumeRun(offset); return offset + run.length; }
Consumes a run of characters at the limits of which we introduce a break.
Params:
  • offset – the offset to start at
Returns:the run that was consumed
/** * Consumes a run of characters at the limits of which we introduce a break. * @param offset the offset to start at * @return the run that was consumed */
private Run consumeRun(int offset) { // assert offset < length char ch= fText.charAt(offset); int length= fText.length(); Run run= getRun(ch); while (run.consume(ch) && offset < length - 1) { offset++; ch= fText.charAt(offset); } return run; }
Returns a run based on a character.
Params:
  • ch – the character to test
Returns:the correct character given ch
/** * Returns a run based on a character. * * @param ch the character to test * @return the correct character given <code>ch</code> */
private Run getRun(char ch) { Run run; if (WHITESPACE.isValid(ch)) run= WHITESPACE; else if (DELIMITER.isValid(ch)) run= DELIMITER; else if (CAMELCASE.isValid(ch)) run= CAMELCASE; else if (OTHER.isValid(ch)) run= OTHER; else { Assert.isTrue(false); return null; } run.init(); return run; } /* * @see java.text.BreakIterator#getText() */ @Override public CharacterIterator getText() { return fIterator.getText(); } /* * @see java.text.BreakIterator#isBoundary(int) */ @Override public boolean isBoundary(int offset) { if (offset == getText().getBeginIndex()) return true; else return following(offset - 1) == offset; } /* * @see java.text.BreakIterator#last() */ @Override public int last() { fIndex= fIterator.last(); return fIndex; } /* * @see java.text.BreakIterator#next() */ @Override public int next() { fIndex= following(fIndex); return fIndex; } /* * @see java.text.BreakIterator#next(int) */ @Override public int next(int n) { return fIterator.next(n); } /* * @see java.text.BreakIterator#preceding(int) */ @Override public int preceding(int offset) { if (offset == getText().getBeginIndex()) return DONE; if (isBoundary(offset - 1)) return offset - 1; int previous= offset - 1; do { previous= fIterator.preceding(previous); } while (!isBoundary(previous)); int last= DONE; while (previous < offset) { last= previous; previous= following(previous); } return last; } /* * @see java.text.BreakIterator#previous() */ @Override public int previous() { fIndex= preceding(fIndex); return fIndex; } /* * @see java.text.BreakIterator#setText(java.lang.String) */ @Override public void setText(String newText) { setText((CharSequence) newText); }
Creates a break iterator given a char sequence.
Params:
  • newText – the new text
/** * Creates a break iterator given a char sequence. * @param newText the new text */
public void setText(CharSequence newText) { fText= newText; fIterator.setText(new SequenceCharacterIterator(newText)); first(); } /* * @see java.text.BreakIterator#setText(java.text.CharacterIterator) */ @Override public void setText(CharacterIterator newText) { if (newText instanceof CharSequence) { fText= (CharSequence) newText; fIterator.setText(newText); first(); } else { throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$ } } }