/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * JFlex 1.8.2                                                             *
 * Copyright (C) 1998-2018  Gerwin Klein <lsf@jflex.de>                    *
 * All rights reserved.                                                    *
 *                                                                         *
 * License: BSD                                                            *
 *                                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.core.unicode;

import static java.lang.Math.max;
import static java.lang.Math.min;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.PrimitiveIterator;
import jflex.chars.Interval;
import jflex.chars.Interval.IntervalIterator;
import jflex.logging.Out;

Mutable Char Set implemented with intervals.
Author:Gerwin Klein, Régis Décamps
Version:JFlex 1.8.2
/** * Mutable Char Set implemented with intervals. * * @author Gerwin Klein * @author Régis Décamps * @version JFlex 1.8.2 */
public final class IntCharSet implements Iterable<Integer> { private static final boolean DEBUG = false; /* invariant: all intervals are disjoint, ordered */ private final List<Interval> intervals = new ArrayList<>();
Creates a charset that contains only one interval.
/** Creates a charset that contains only one interval. */
public static IntCharSet of(Interval interval) { IntCharSet charset = new IntCharSet(); charset.intervals.add(interval); if (DEBUG) assert charset.invariants(); return charset; }
Creates a charset that contains the given intervals.
Params:
  • intervals – the intervals the new set should contains.
Returns:a new IntCharSet with the given intervals.
/** * Creates a charset that contains the given intervals. * * @param intervals the intervals the new set should contains. * @return a new IntCharSet with the given intervals. */
public static IntCharSet of(Interval... intervals) { IntCharSet charset = new IntCharSet(); for (Interval i : intervals) { charset.add(i); } return charset; }
Creates a charset that contains only one interval, given by its start and end values.
/** * Creates a charset that contains only one interval, given by its {@code start} and {@code end} * values. */
public static IntCharSet ofCharacterRange(int start, int end) { return of(new Interval(start, end)); }
Creates a char set that contains only the given character.
/** Creates a char set that contains only the given character. */
public static IntCharSet ofCharacter(int singleChar) { return of(Interval.ofCharacter(singleChar)); }
Creates the set of all characters.
Returns:a new IntCharSet that contains all characters.
/** * Creates the set of all characters. * * @return a new IntCharSet that contains all characters. */
public static IntCharSet allChars() { return IntCharSet.ofCharacterRange(0, CharClasses.maxChar); }
The set of new-line characters.
Returns:a new IntCharSet that contains all characters that are considered a new-line char in Java.
/** * The set of new-line characters. * * @return a new IntCharSet that contains all characters that are considered a new-line char in * Java. */
public static IntCharSet nlChars() { IntCharSet set = new IntCharSet(); set.intervals.add(new Interval('\n', '\r')); set.intervals.add(Interval.ofCharacter('\u0085')); set.intervals.add(new Interval('\u2028', '\u2029')); return set; }
Returns the index of the interval that contains the character c.

Binary search which interval contains the given character.

Params:
  • c – the character to search for
Returns:the index of the enclosing interval, or -1 if no such interval
/** * Returns the index of the interval that contains the character {@code c}. * * <p>Binary search which interval contains the given character. * * @param c the character to search for * @return the index of the enclosing interval, or -1 if no such interval */
private int indexOf(int c) { int start = 0; int end = intervals.size() - 1; while (start <= end) { int check = (start + end) / 2; Interval i = intervals.get(check); if (start == end) { return i.contains(c) ? start : -1; } if (c < i.start) { end = check - 1; continue; } if (c > i.end) { start = check + 1; continue; } if (DEBUG) assert intervals.get(check).contains(c); return check; } if (DEBUG) { for (Interval i : intervals) assert (!i.contains(c)); } return -1; }
Merges the given set into this one.
/** Merges the given set into this one. */
@SuppressWarnings("ReferenceEquality") public void add(IntCharSet set) { if (DEBUG) { assert invariants(); assert set.invariants(); assert this != set; // reference comparison intended, must supply different object } for (Interval interval : set.intervals) { add(interval); } }
Adds a single interval to this IntCharSet.
Params:
/** * Adds a single interval to this IntCharSet. * * @param interval a {@link jflex.chars.Interval} object. */
@SuppressWarnings("IncrementInForLoopAndHeader") public void add(Interval interval) { if (DEBUG) assert interval.invariants(); int size = intervals.size(); for (int i = 0; i < size; i++) { Interval elem = intervals.get(i); if (elem.end + 1 >= interval.start) { if (elem.contains(interval)) { if (DEBUG) assert invariants(); return; } if (elem.start > interval.end + 1) { intervals.add(i, Interval.copyOf(interval)); if (DEBUG) assert invariants(); return; } if (interval.start < elem.start) { elem.start = interval.start; } if (interval.end <= elem.end) { if (DEBUG) assert invariants(); return; } elem.end = interval.end; i++; // delete all x with x.contains( interval.end ) while (i < size) { Interval x = intervals.get(i); if (x.start > elem.end + 1) { if (DEBUG) assert invariants(); return; } if (x.end > elem.end) { elem.end = x.end; } intervals.remove(i); size--; } if (DEBUG) assert invariants(); return; } } intervals.add(Interval.copyOf(interval)); if (DEBUG) assert invariants(); }
Adds a single character.
Params:
  • c – Character to add.
/** * Adds a single character. * * @param c Character to add. */
public void add(int c) { int size = intervals.size(); for (int i = 0; i < size; i++) { Interval elem = intervals.get(i); if (elem.end + 1 < c) continue; if (elem.contains(c)) return; // already there, nothing to do if (DEBUG) assert (elem.end + 1 >= c && (elem.start > c || elem.end < c)); if (elem.start > c + 1) { intervals.add(i, Interval.ofCharacter(c)); if (DEBUG) assert invariants(); return; } if (DEBUG) assert (elem.end + 1 >= c && elem.start <= c + 1 && (elem.start > c || elem.end < c)); if (c + 1 == elem.start) { elem.start = c; if (DEBUG) assert invariants(); return; } if (DEBUG) assert (elem.end + 1 == c); elem.end = c; // merge with next interval if it contains c if (i + 1 >= size) { if (DEBUG) assert invariants(); return; } Interval x = intervals.get(i + 1); if (x.start <= c + 1) { elem.end = x.end; intervals.remove(i + 1); } if (DEBUG) assert invariants(); return; } // end reached but nothing found -> append at end intervals.add(Interval.ofCharacter(c)); if (DEBUG) assert invariants(); }
Returns whether this set contains a given character.
Params:
  • singleChar – a single character (int).
Returns:true iff singleChar is contained in the set.
/** * Returns whether this set contains a given character. * * @param singleChar a single character (int). * @return true iff singleChar is contained in the set. */
public boolean contains(int singleChar) { return indexOf(singleChar) >= 0; }
Check whether this set contains a another set.
Params:
  • other – an IntCharSet.
Returns:true iff all characters of other are contained in this set.
/** * Check whether this set contains a another set. * * @param other an IntCharSet. * @return true iff all characters of {@code other} are contained in this set. */
public boolean contains(IntCharSet other) { // treat null as empty set if (other == null) { return true; } IntCharSet set = IntCharSet.copyOf(other); IntCharSet inter = this.and(other); set.sub(inter); return !set.containsElements(); } @Override public boolean equals(Object o) { if (!(o instanceof IntCharSet)) { return false; } IntCharSet set = (IntCharSet) o; return Objects.equals(intervals, set.intervals); } @Override public int hashCode() { int h = 1; for (Interval interval : intervals) { h *= 1000003; h ^= interval.hashCode(); } return h; }
Intersects two sets.
Params:
Returns:the IntCharSet common to the two sets.
/** * Intersects two sets. * * @param set a {@link IntCharSet} object. * @return the {@link IntCharSet} common to the two sets. */
public IntCharSet and(IntCharSet set) { if (DEBUG) { Out.dump("intersection"); Out.dump("this : " + this); Out.dump("other : " + set); } IntCharSet result = new IntCharSet(); int i = 0; // index in this.intervals int j = 0; // index in set.intervals int size = intervals.size(); int setSize = set.intervals.size(); while (i < size && j < setSize) { Interval x = this.intervals.get(i); Interval y = set.intervals.get(j); if (x.end < y.start) { i++; continue; } if (y.end < x.start) { j++; continue; } result.intervals.add(new Interval(max(x.start, y.start), min(x.end, y.end))); if (x.end >= y.end) j++; if (y.end >= x.end) i++; } if (DEBUG) { Out.dump("result: " + result); assert result.invariants(); } return result; }
Returns the relative complement of this set relative to the provided set.

Assumes that set is non-null, and contained in this IntCharSet.

Params:
/** * Returns the relative complement of this set relative to the provided set. * * <p>Assumes that {@code set} is non-null, and contained in this IntCharSet. * * @param set a {@link IntCharSet} to substract from this set. */
@SuppressWarnings("ReferenceEquality") public void sub(IntCharSet set) { if (DEBUG) { Out.dump("complement"); Out.dump("this : " + this); Out.dump("other : " + set); assert invariants(); // not asserting non-null, because we'll already get an exception and it confuses lgtm.com assert set.invariants(); assert isSubSet(set, this); assert set != this; // reference comparison intended, must supply different object } int i = 0; // index in this.intervals int j = 0; // index in set.intervals int setSize = set.intervals.size(); while (i < intervals.size() && j < setSize) { Interval x = this.intervals.get(i); Interval y = set.intervals.get(j); if (DEBUG) { Out.dump("this : " + this); Out.dump("this [" + i + "] : " + x); Out.dump("other [" + j + "] : " + y); } if (x.end < y.start) { i++; continue; } if (y.end < x.start) { j++; continue; } // x.end >= y.start && y.end >= x.start -> // x.end <= y.end && x.start >= y.start (prec) if (x.start == y.start && x.end == y.end) { intervals.remove(i); j++; continue; } // x.end <= y.end && x.start >= y.start && // (x.end < y.end || x.start > y.start) -> // x.start < x.end if (x.start == y.start) { x.start = y.end + 1; j++; continue; } if (x.end == y.end) { x.end = y.start - 1; i++; j++; continue; } intervals.add(i, new Interval(x.start, y.start - 1)); x.start = y.end + 1; i++; j++; } if (DEBUG) { Out.dump("result: " + this); assert invariants(); } }
Returns the complement of the specified set x, that is, the set of all elements that are not contained in x.
Params:
Returns:the complement of x
/** * Returns the complement of the specified set x, that is, the set of all elements that are not * contained in x. * * @param x the {@link IntCharSet} to take the complement of. * @return the complement of x */
public static IntCharSet complementOf(IntCharSet x) { IntCharSet result = allChars(); if (x != null) { result.sub(x); } return result; }
Returns whether the set contains elements.
Returns:Whether the set is non-empty.
/** * Returns whether the set contains elements. * * @return Whether the set is non-empty. */
public boolean containsElements() { return intervals.size() > 0; }
Returns the number of intervals.
Returns:number of intervals.
/** * Returns the number of intervals. * * @return number of intervals. */
public int numIntervals() { return intervals.size(); }
Returns the intervals.
Returns:a List object.
/** * Returns the intervals. * * @return a {@link java.util.List} object. */
public List<Interval> getIntervals() { return intervals; }
Returns:an iterator over the intervals in this set
/** @return an iterator over the intervals in this set */
public Iterator<Interval> intervalIterator() { return intervals.iterator(); }
Create a caseless version of this charset.

The caseless version contains all characters of this char set, and additionally all lower/upper/title case variants of the characters in this set.

Params:
  • unicodeProperties – The Unicode Properties to use when generating caseless equivalence classes.
Returns:a caseless copy of this set
/** * Create a caseless version of this charset. * * <p>The caseless version contains all characters of this char set, and additionally all * lower/upper/title case variants of the characters in this set. * * @param unicodeProperties The Unicode Properties to use when generating caseless equivalence * classes. * @return a caseless copy of this set */
public IntCharSet getCaseless(UnicodeProperties unicodeProperties) { IntCharSet n = copyOf(this); for (Interval elem : intervals) { for (int c = elem.start; c <= elem.end; c++) { IntCharSet equivalenceClass = unicodeProperties.getCaselessMatches(c); if (null != equivalenceClass) { n.add(equivalenceClass); } } } return n; } @Override public String toString() { StringBuilder result = new StringBuilder("{ "); for (Interval interval : intervals) { result.append(interval); } result.append(" }"); return result.toString(); }
Creates a IntCharSet from an existing IntCharSet.
Returns:a (deep) copy of the char set.
/** * Creates a IntCharSet from an existing IntCharSet. * * @return a (deep) copy of the char set. */
public static IntCharSet copyOf(IntCharSet intCharSet) { IntCharSet result = new IntCharSet(); for (Interval interval : intCharSet.intervals) { result.intervals.add(Interval.copyOf(interval)); } if (DEBUG) assert result.invariants(); return result; }
Computes the size of this set.
Returns:how many elements are contained in this set
/** * Computes the size of this set. * * @return how many elements are contained in this set */
public int size() { int charCount = 0; for (Interval i : intervals) charCount += i.size(); return charCount; }
Checks the invariants of this object.
Returns:true when the invariants of this objects hold.
/** * Checks the invariants of this object. * * @return true when the invariants of this objects hold. */
boolean invariants() { for (Interval i : intervals) { if (!i.invariants()) { return false; } } for (int j = 0; j < intervals.size() - 1; j++) { // disjoint and ordered if (intervals.get(j).end >= intervals.get(j + 1).start) { return false; } } return true; }
Very slow but elementary method to determine whether s1 is a subset of s2. For assertions in debugging/testing only.
Params:
  • s1 – the first IntCharSet
  • s2 – the second IntCharSet
Returns:true iff s1 is a subset of s2
/** * Very slow but elementary method to determine whether s1 is a subset of s2. For assertions in * debugging/testing only. * * @param s1 the first IntCharSet * @param s2 the second IntCharSet * @return true iff s1 is a subset of s2 */
static boolean isSubSet(IntCharSet s1, IntCharSet s2) { for (int i : s1) { if (!s2.contains(i)) { return false; } } return true; } @Override public IntCharSetIterator iterator() { return new IntCharSetIterator(); } Interval getFirstInterval() { return intervals.get(0); }
Iterator for enumerating the elements of this IntCharSet
/** Iterator for enumerating the elements of this IntCharSet */
public class IntCharSetIterator implements PrimitiveIterator.OfInt {
Iterator over the Interval list
/** Iterator over the Interval list */
private final Iterator<Interval> intervalsIterator;
Iterator within the current Interval
/** Iterator within the current Interval */
private IntervalIterator current;
New iterator for this IntCharSet
/** New iterator for this IntCharSet */
private IntCharSetIterator() { intervalsIterator = intervals.iterator(); if (intervalsIterator.hasNext()) current = intervalsIterator.next().iterator(); } @Override public boolean hasNext() { return current != null && (current.hasNext() || intervalsIterator.hasNext()); } @Override public int nextInt() { if (!current.hasNext()) current = intervalsIterator.next().iterator(); return current.nextInt(); } } }