/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * JFlex 1.8.2                                                             *
 * Copyright (C) 1998-2018  Gerwin Klein <lsf@jflex.de>                    *
 * All rights reserved.                                                    *
 *                                                                         *
 * License: BSD                                                            *
 *                                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.core;

import java.util.List;
import jflex.core.unicode.CharClasses;
import jflex.core.unicode.IntCharSet;
import jflex.exceptions.CharClassException;

Stores a regular expression of rules section in a JFlex-specification.

This base class has no content other than its type.

Author:Gerwin Klein
Version:JFlex 1.8.2
/** * Stores a regular expression of rules section in a JFlex-specification. * * <p>This base class has no content other than its type. * * @author Gerwin Klein * @version JFlex 1.8.2 */
public class RegExp {
The type of the regular expression. This field will be filled with values from class sym.java (generated by cup)
/** * The type of the regular expression. This field will be filled with values from class sym.java * (generated by cup) */
int type;
Create a new regular expression of the specified type.
Params:
  • type – a value from the cup generated class sym.
/** * Create a new regular expression of the specified type. * * @param type a value from the cup generated class sym. */
public RegExp(int type) { this.type = type; }
Returns a String-representation of this regular expression with the specified indentation.
Params:
  • tab – a String that should contain only space characters and that is inserted in front of standard String-representation pf this object.
Returns:a String object.
/** * Returns a String-representation of this regular expression with the specified indentation. * * @param tab a String that should contain only space characters and that is inserted in front of * standard String-representation pf this object. * @return a {@link java.lang.String} object. */
public String print(String tab) { return tab + toString(); } @Override public String toString() { return "type = " + typeName(); }
String representation of the type of this regular expression.
/** String representation of the type of this regular expression. */
public String typeName() { return sym.terminalNames[type]; }
Find out if this regexp is a char class or equivalent to one.
Returns:true if the regexp is equivalent to a char class.
/** * Find out if this regexp is a char class or equivalent to one. * * @return true if the regexp is equivalent to a char class. */
public boolean isCharClass() { switch (type) { case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: return true; case sym.BAR: RegExp2 binary = (RegExp2) this; return binary.r1.isCharClass() && binary.r2.isCharClass(); default: return false; } }
The approximate number of NFA states this expression will need (only works correctly after macro expansion and without negation)
Params:
  • macros – macro table for expansion
Returns:a int.
/** * The approximate number of NFA states this expression will need (only works correctly after * macro expansion and without negation) * * @param macros macro table for expansion * @return a int. */
public int size(Macros macros) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return binary.r1.size(macros) + binary.r2.size(macros) + 2; case sym.CONCAT: binary = (RegExp2) this; return binary.r1.size(macros) + binary.r2.size(macros); case sym.STAR: case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) + 2; case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) * content.size(macros); // this is only a very rough estimate (worst case 2^n) // exact size too complicated (propably requires construction) case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) * content.size(macros) * 3; // see sym.BANG case sym.STRING: case sym.STRING_I: unary = (RegExp1) this; return ((String) unary.content).length() + 1; case sym.CHAR: case sym.CHAR_I: return 2; case sym.CCLASS: case sym.CCLASSNOT: case sym.CCLASSOP: case sym.PRIMCLASS: return 2; case sym.MACROUSE: unary = (RegExp1) this; return macros.getDefinition((String) unary.content).size(macros); default: throw new RegExpException(this); } }
Reverses a string.
/** Reverses a string. */
static String revString(String s) { return new StringBuilder(s).reverse().toString(); }
Recursively convert tilde (upto) expressions into negation and star.
Returns:new RegExp equivalent to the current one, but without upto expressions.
/** * Recursively convert tilde (upto) expressions into negation and star. * * @return new RegExp equivalent to the current one, but without upto expressions. */
public final RegExp resolveTilde() { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return new RegExp2(sym.BAR, binary.r1.resolveTilde(), binary.r2.resolveTilde()); case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(), binary.r2.resolveTilde()); case sym.STAR: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.STAR, content.resolveTilde()); case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.PLUS, content.resolveTilde()); case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.QUESTION, content.resolveTilde()); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.BANG, content.resolveTilde()); case sym.TILDE: // ~a = !([^]* a [^]*) a // uses subexpression sharing unary = (RegExp1) this; content = ((RegExp) unary.content).resolveTilde(); RegExp any_star = new RegExp1(sym.STAR, anyChar()); RegExp neg = new RegExp1( sym.BANG, new RegExp2(sym.CONCAT, any_star, new RegExp2(sym.CONCAT, content, any_star))); return new RegExp2(sym.CONCAT, neg, content); case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(unary.type, unary.content); default: throw new RegExpException(this); } }
Returns a regexp that matches any character: [^]
Returns:the regexp for [^]
/** * Returns a regexp that matches any character: {@code [^]} * * @return the regexp for {@code [^]} */
public static RegExp anyChar() { return new RegExp1(sym.PRIMCLASS, IntCharSet.allChars()); }
Confirms that the parameter is a RegExp1 of type sym.PRIMCLASS.
Params:
  • r – the RegExp to check
Throws:
Returns:r cast to RegExp1
/** * Confirms that the parameter is a RegExp1 of type sym.PRIMCLASS. * * @param r the RegExp to check * @throws CharClassException if r is not a RegExp1 or of type sym.PRIMCLASS. * @return r cast to RegExp1 */
public static RegExp1 checkPrimClass(RegExp r) { if (!(r instanceof RegExp1 && r.type == sym.PRIMCLASS)) throw new CharClassException("Not normalised " + r); return (RegExp1) r; }
Performs the given set operation on the two IntCharSet parameters.
Params:
  • op – the operation to perform (as @{link sym} constant)
  • l – the left operator of the expression
  • r – the right operator of the expression
  • ctxt – the regular expression containing the provided operator
Throws:
Returns:a new IntCharSet
/** * Performs the given set operation on the two {@link IntCharSet} parameters. * * @param op the operation to perform (as @{link sym} constant) * @param l the left operator of the expression * @param r the right operator of the expression * @param ctxt the regular expression containing the provided operator * @return a new {@link IntCharSet} * @throws RegExpException for {@code ctxt} if the operator is not supported */
public static IntCharSet performClassOp(int op, IntCharSet l, IntCharSet r, RegExp ctxt) { IntCharSet set; IntCharSet intersection = l.and(r); switch (op) { case sym.INTERSECTION: return intersection; case sym.DIFFERENCE: // IntCharSet.sub() assumes its argument is a subset, so subtract intersection set = IntCharSet.copyOf(l); set.sub(intersection); return set; case sym.SYMMETRICDIFFERENCE: set = IntCharSet.copyOf(l); set.add(r); set.sub(intersection); return set; default: throw new RegExpException(ctxt); } }
Normalise the regular expression to eliminate macro use (expand them), and compound character class expression (compute their content).
Returns:a regexp that contains only IntCharSet char classes and no sym.MACROUSE.
/** * Normalise the regular expression to eliminate macro use (expand them), and compound character * class expression (compute their content). * * @return a regexp that contains only {@link IntCharSet} char classes and no {@link * sym#MACROUSE}. */
@SuppressWarnings("unchecked") public final RegExp normalise(Macros m) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(type, binary.r1.normalise(m), binary.r2.normalise(m)); case sym.STAR: case sym.PLUS: case sym.QUESTION: case sym.BANG: case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(type, content.normalise(m)); case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(type, unary.content); case sym.CCLASS: { unary = (RegExp1) this; List<RegExp> contents = (List<RegExp>) unary.content; IntCharSet set = new IntCharSet(); for (RegExp r : contents) { RegExp1 n = checkPrimClass(r.normalise(m)); set.add((IntCharSet) n.content); } return new RegExp1(sym.PRIMCLASS, set); } case sym.CCLASSNOT: { unary = (RegExp1) this; List<RegExp> contents = (List<RegExp>) unary.content; IntCharSet set = IntCharSet.allChars(); for (RegExp r : contents) { RegExp1 n = checkPrimClass(r.normalise(m)); set.sub((IntCharSet) n.content); } return new RegExp1(sym.PRIMCLASS, set); } case sym.CCLASSOP: unary = (RegExp1) this; binary = (RegExp2) unary.content; RegExp1 l = checkPrimClass(binary.r1.normalise(m)); IntCharSet setl = (IntCharSet) l.content; RegExp1 r = checkPrimClass(binary.r2.normalise(m)); IntCharSet setr = (IntCharSet) r.content; IntCharSet set = performClassOp(binary.type, setl, setr, this); return new RegExp1(sym.PRIMCLASS, set); case sym.MACROUSE: unary = (RegExp1) this; return m.getDefinition((String) unary.content).normalise(m); default: throw new RegExpException(this); } }
Make character class partitions based on the classes mentioned in this regexp.

Assumption: regexp is normalised.

/** * Make character class partitions based on the classes mentioned in this regexp. * * <p>Assumption: regexp is normalised. */
public final void makeCCLs(CharClasses c, boolean caseless) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: case sym.CONCAT: binary = (RegExp2) this; binary.r1.makeCCLs(c, caseless); binary.r2.makeCCLs(c, caseless); return; case sym.STAR: case sym.PLUS: case sym.QUESTION: case sym.BANG: case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; content.makeCCLs(c, caseless); return; case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: return; case sym.PRIMCLASS: unary = (RegExp1) this; IntCharSet set = (IntCharSet) unary.content; c.makeClass(set, caseless); return; default: throw new CharClassException("makeCCLs: unexpected regexp " + this); } }
Creates a new regexp that matches the reverse text of this one.
Returns:the reverse regexp
/** * Creates a new regexp that matches the reverse text of this one. * * @return the reverse regexp */
public final RegExp rev() { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return new RegExp2(sym.BAR, binary.r1.rev(), binary.r2.rev()); case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(sym.CONCAT, binary.r2.rev(), binary.r1.rev()); case sym.STAR: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.STAR, content.rev()); case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.PLUS, content.rev()); case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.QUESTION, content.rev()); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.BANG, content.rev()); case sym.TILDE: content = resolveTilde(); return content.rev(); case sym.STRING: case sym.STRING_I: unary = (RegExp1) this; return new RegExp1(unary.type, revString((String) unary.content)); case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(unary.type, unary.content); default: throw new RegExpException(this); } } }