de.jflex/jflex/1.8.2 : jflex/core/RegExp.java

RegExp
https://jflex-de.github.io/jflex/jflex/: JFlex is a lexical analyzer generator (also known as scanner generator) for Java™, written in Java. (JFlex)
Gerwin Klein
Steve Rowe
Régis Décamps
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * JFlex 1.8.2                                                             *
 * Copyright (C) 1998-2018  Gerwin Klein <lsf@jflex.de>                    *
 * All rights reserved.                                                    *
 *                                                                         *
 * License: BSD                                                            *
 *                                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.core;

import java.util.List;
import jflex.core.unicode.CharClasses;
import jflex.core.unicode.IntCharSet;
import jflex.exceptions.CharClassException;

Stores a regular expression of rules section in a JFlex-specification.
This base class has no content other than its type.
Author: Gerwin Klein
Version: JFlex 1.8.2/**
 * Stores a regular expression of rules section in a JFlex-specification.
 *
 * <p>This base class has no content other than its type.
 *
 * @author Gerwin Klein
 * @version JFlex 1.8.2
 */
public class RegExp {

  The type of the regular expression. This field will be filled with values from class sym.java
(generated by cup)
/**
   * The type of the regular expression. This field will be filled with values from class sym.java
   * (generated by cup)
   */
  int type;

  Create a new regular expression of the specified type.
Params: type – a value from the cup generated class sym./**
   * Create a new regular expression of the specified type.
   *
   * @param type a value from the cup generated class sym.
   */
  public RegExp(int type) {
    this.type = type;
  }

  Returns a String-representation of this regular expression with the specified indentation.
Params: tab – a String that should contain only space characters and that is inserted in front of
    standard String-representation pf this object.
Returns: a String object./**
   * Returns a String-representation of this regular expression with the specified indentation.
   *
   * @param tab a String that should contain only space characters and that is inserted in front of
   *     standard String-representation pf this object.
   * @return a {@link java.lang.String} object.
   */
  public String print(String tab) {
    return tab + toString();
  }

  @Override
  public String toString() {
    return "type = " + typeName();
  }

  String representation of the type of this regular expression. /** String representation of the type of this regular expression. */
  public String typeName() {
    return sym.terminalNames[type];
  }

  Find out if this regexp is a char class or equivalent to one.
Returns: true if the regexp is equivalent to a char class./**
   * Find out if this regexp is a char class or equivalent to one.
   *
   * @return true if the regexp is equivalent to a char class.
   */
  public boolean isCharClass() {
    switch (type) {
      case sym.CHAR:
      case sym.CHAR_I:
      case sym.PRIMCLASS:
        return true;

      case sym.BAR:
        RegExp2 binary = (RegExp2) this;
        return binary.r1.isCharClass() && binary.r2.isCharClass();

      default:
        return false;
    }
  }

  The approximate number of NFA states this expression will need (only works correctly after
macro expansion and without negation)
Params: macros – macro table for expansion
Returns: a int./**
   * The approximate number of NFA states this expression will need (only works correctly after
   * macro expansion and without negation)
   *
   * @param macros macro table for expansion
   * @return a int.
   */
  public int size(Macros macros) {
    RegExp1 unary;
    RegExp2 binary;
    RegExp content;

    switch (type) {
      case sym.BAR:
        binary = (RegExp2) this;
        return binary.r1.size(macros) + binary.r2.size(macros) + 2;

      case sym.CONCAT:
        binary = (RegExp2) this;
        return binary.r1.size(macros) + binary.r2.size(macros);

      case sym.STAR:
      case sym.PLUS:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return content.size(macros) + 2;

      case sym.QUESTION:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return content.size(macros);

      case sym.BANG:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return content.size(macros) * content.size(macros);
        // this is only a very rough estimate (worst case 2^n)
        // exact size too complicated (propably requires construction)

      case sym.TILDE:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return content.size(macros) * content.size(macros) * 3;
        // see sym.BANG

      case sym.STRING:
      case sym.STRING_I:
        unary = (RegExp1) this;
        return ((String) unary.content).length() + 1;

      case sym.CHAR:
      case sym.CHAR_I:
        return 2;

      case sym.CCLASS:
      case sym.CCLASSNOT:
      case sym.CCLASSOP:
      case sym.PRIMCLASS:
        return 2;

      case sym.MACROUSE:
        unary = (RegExp1) this;
        return macros.getDefinition((String) unary.content).size(macros);

      default:
        throw new RegExpException(this);
    }
  }

  Reverses a string. /** Reverses a string. */
  static String revString(String s) {
    return new StringBuilder(s).reverse().toString();
  }

  Recursively convert tilde (upto) expressions into negation and star.
Returns: new RegExp equivalent to the current one, but without upto expressions./**
   * Recursively convert tilde (upto) expressions into negation and star.
   *
   * @return new RegExp equivalent to the current one, but without upto expressions.
   */
  public final RegExp resolveTilde() {
    RegExp1 unary;
    RegExp2 binary;
    RegExp content;

    switch (type) {
      case sym.BAR:
        binary = (RegExp2) this;
        return new RegExp2(sym.BAR, binary.r1.resolveTilde(), binary.r2.resolveTilde());

      case sym.CONCAT:
        binary = (RegExp2) this;
        return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(), binary.r2.resolveTilde());

      case sym.STAR:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.STAR, content.resolveTilde());

      case sym.PLUS:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.PLUS, content.resolveTilde());

      case sym.QUESTION:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.QUESTION, content.resolveTilde());

      case sym.BANG:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.BANG, content.resolveTilde());

      case sym.TILDE:
        // ~a = !([^]* a [^]*) a
        // uses subexpression sharing
        unary = (RegExp1) this;
        content = ((RegExp) unary.content).resolveTilde();

        RegExp any_star = new RegExp1(sym.STAR, anyChar());
        RegExp neg =
            new RegExp1(
                sym.BANG,
                new RegExp2(sym.CONCAT, any_star, new RegExp2(sym.CONCAT, content, any_star)));

        return new RegExp2(sym.CONCAT, neg, content);

      case sym.STRING:
      case sym.STRING_I:
      case sym.CHAR:
      case sym.CHAR_I:
      case sym.PRIMCLASS:
        unary = (RegExp1) this;
        return new RegExp1(unary.type, unary.content);

      default:
        throw new RegExpException(this);
    }
  }

  Returns a regexp that matches any character: [^] 
Returns: the regexp for [^]/**
   * Returns a regexp that matches any character: {@code [^]}
   *
   * @return the regexp for {@code [^]}
   */
  public static RegExp anyChar() {
    return new RegExp1(sym.PRIMCLASS, IntCharSet.allChars());
  }

  Confirms that the parameter is a RegExp1 of type sym.PRIMCLASS.
Params: r – the RegExp to check
Throws: CharClassException – if r is not a RegExp1 or of type sym.PRIMCLASS.
Returns: r cast to RegExp1/**
   * Confirms that the parameter is a RegExp1 of type sym.PRIMCLASS.
   *
   * @param r the RegExp to check
   * @throws CharClassException if r is not a RegExp1 or of type sym.PRIMCLASS.
   * @return r cast to RegExp1
   */
  public static RegExp1 checkPrimClass(RegExp r) {
    if (!(r instanceof RegExp1 && r.type == sym.PRIMCLASS))
      throw new CharClassException("Not normalised " + r);
    return (RegExp1) r;
  }

  Performs the given set operation on the two IntCharSet parameters. 
Params: op – the operation to perform (as @{link sym} constant)
l – the left operator of the expression
r – the right operator of the expression
ctxt – the regular expression containing the provided operator
Throws: RegExpException – for ctxt if the operator is not supported
Returns: a new IntCharSet/**
   * Performs the given set operation on the two {@link IntCharSet} parameters.
   *
   * @param op the operation to perform (as @{link sym} constant)
   * @param l the left operator of the expression
   * @param r the right operator of the expression
   * @param ctxt the regular expression containing the provided operator
   * @return a new {@link IntCharSet}
   * @throws RegExpException for {@code ctxt} if the operator is not supported
   */
  public static IntCharSet performClassOp(int op, IntCharSet l, IntCharSet r, RegExp ctxt) {
    IntCharSet set;
    IntCharSet intersection = l.and(r);

    switch (op) {
      case sym.INTERSECTION:
        return intersection;

      case sym.DIFFERENCE:
        // IntCharSet.sub() assumes its argument is a subset, so subtract intersection
        set = IntCharSet.copyOf(l);
        set.sub(intersection);
        return set;

      case sym.SYMMETRICDIFFERENCE:
        set = IntCharSet.copyOf(l);
        set.add(r);
        set.sub(intersection);
        return set;

      default:
        throw new RegExpException(ctxt);
    }
  }

  Normalise the regular expression to eliminate macro use (expand them), and compound character
class expression (compute their content).
Returns: a regexp that contains only IntCharSet char classes and no sym.MACROUSE./**
   * Normalise the regular expression to eliminate macro use (expand them), and compound character
   * class expression (compute their content).
   *
   * @return a regexp that contains only {@link IntCharSet} char classes and no {@link
   *     sym#MACROUSE}.
   */
  @SuppressWarnings("unchecked")
  public final RegExp normalise(Macros m) {
    RegExp1 unary;
    RegExp2 binary;
    RegExp content;

    switch (type) {
      case sym.BAR:
      case sym.CONCAT:
        binary = (RegExp2) this;
        return new RegExp2(type, binary.r1.normalise(m), binary.r2.normalise(m));

      case sym.STAR:
      case sym.PLUS:
      case sym.QUESTION:
      case sym.BANG:
      case sym.TILDE:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(type, content.normalise(m));

      case sym.STRING:
      case sym.STRING_I:
      case sym.CHAR:
      case sym.CHAR_I:
      case sym.PRIMCLASS:
        unary = (RegExp1) this;
        return new RegExp1(type, unary.content);

      case sym.CCLASS:
        {
          unary = (RegExp1) this;
          List<RegExp> contents = (List<RegExp>) unary.content;
          IntCharSet set = new IntCharSet();
          for (RegExp r : contents) {
            RegExp1 n = checkPrimClass(r.normalise(m));
            set.add((IntCharSet) n.content);
          }
          return new RegExp1(sym.PRIMCLASS, set);
        }

      case sym.CCLASSNOT:
        {
          unary = (RegExp1) this;
          List<RegExp> contents = (List<RegExp>) unary.content;
          IntCharSet set = IntCharSet.allChars();
          for (RegExp r : contents) {
            RegExp1 n = checkPrimClass(r.normalise(m));
            set.sub((IntCharSet) n.content);
          }
          return new RegExp1(sym.PRIMCLASS, set);
        }

      case sym.CCLASSOP:
        unary = (RegExp1) this;
        binary = (RegExp2) unary.content;
        RegExp1 l = checkPrimClass(binary.r1.normalise(m));
        IntCharSet setl = (IntCharSet) l.content;
        RegExp1 r = checkPrimClass(binary.r2.normalise(m));
        IntCharSet setr = (IntCharSet) r.content;
        IntCharSet set = performClassOp(binary.type, setl, setr, this);
        return new RegExp1(sym.PRIMCLASS, set);

      case sym.MACROUSE:
        unary = (RegExp1) this;
        return m.getDefinition((String) unary.content).normalise(m);

      default:
        throw new RegExpException(this);
    }
  }

  Make character class partitions based on the classes mentioned in this regexp.
Assumption: regexp is normalised.
/**
   * Make character class partitions based on the classes mentioned in this regexp.
   *
   * <p>Assumption: regexp is normalised.
   */
  public final void makeCCLs(CharClasses c, boolean caseless) {
    RegExp1 unary;
    RegExp2 binary;
    RegExp content;

    switch (type) {
      case sym.BAR:
      case sym.CONCAT:
        binary = (RegExp2) this;
        binary.r1.makeCCLs(c, caseless);
        binary.r2.makeCCLs(c, caseless);
        return;

      case sym.STAR:
      case sym.PLUS:
      case sym.QUESTION:
      case sym.BANG:
      case sym.TILDE:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        content.makeCCLs(c, caseless);
        return;

      case sym.STRING:
      case sym.STRING_I:
      case sym.CHAR:
      case sym.CHAR_I:
        return;

      case sym.PRIMCLASS:
        unary = (RegExp1) this;
        IntCharSet set = (IntCharSet) unary.content;
        c.makeClass(set, caseless);
        return;

      default:
        throw new CharClassException("makeCCLs: unexpected regexp " + this);
    }
  }

  Creates a new regexp that matches the reverse text of this one.
Returns: the reverse regexp/**
   * Creates a new regexp that matches the reverse text of this one.
   *
   * @return the reverse regexp
   */
  public final RegExp rev() {
    RegExp1 unary;
    RegExp2 binary;
    RegExp content;

    switch (type) {
      case sym.BAR:
        binary = (RegExp2) this;
        return new RegExp2(sym.BAR, binary.r1.rev(), binary.r2.rev());

      case sym.CONCAT:
        binary = (RegExp2) this;
        return new RegExp2(sym.CONCAT, binary.r2.rev(), binary.r1.rev());

      case sym.STAR:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.STAR, content.rev());

      case sym.PLUS:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.PLUS, content.rev());

      case sym.QUESTION:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.QUESTION, content.rev());

      case sym.BANG:
        unary = (RegExp1) this;
        content = (RegExp) unary.content;
        return new RegExp1(sym.BANG, content.rev());

      case sym.TILDE:
        content = resolveTilde();
        return content.rev();

      case sym.STRING:
      case sym.STRING_I:
        unary = (RegExp1) this;
        return new RegExp1(unary.type, revString((String) unary.content));

      case sym.CHAR:
      case sym.CHAR_I:
      case sym.PRIMCLASS:
        unary = (RegExp1) this;
        return new RegExp1(unary.type, unary.content);

      default:
        throw new RegExpException(this);
    }
  }
}
Params:	tab – a String that should contain only space characters and that is inserted in front of standard String-representation pf this object.
Returns:	a `String` object.
Params:	r – the RegExp to check
Throws:	CharClassException – if r is not a RegExp1 or of type sym.PRIMCLASS.
Returns:	r cast to RegExp1
Params:	op – the operation to perform (as @{link sym} constant) l – the left operator of the expression r – the right operator of the expression ctxt – the regular expression containing the provided operator
Throws:	RegExpException – for `ctxt` if the operator is not supported
Returns:	a new `IntCharSet`
/

de.jflex/ jflex/ 1.8.2/ jflex/core/RegExp.java