/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.regex.literal;
import com.oracle.truffle.regex.RegexLanguage;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.EmptyEndsWith;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.EmptyEquals;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.EmptyIndexOf;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.EmptyStartsWith;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.EndsWith;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.Equals;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.IndexOfString;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.RegionMatches;
import com.oracle.truffle.regex.literal.LiteralRegexExecRootNode.StartsWith;
import com.oracle.truffle.regex.tregex.parser.RegexProperties;
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
import com.oracle.truffle.regex.tregex.parser.ast.visitors.PreCalcResultVisitor;
import com.oracle.truffle.regex.tregex.string.Encodings;
This regex engine is designed for very simple cases, where the regular expression can be directly translated to common string operations. It will map expressions to simple index checks ( EmptyStartsWith
, EmptyEndsWith
, EmptyIndexOf
) or to the following methods of String
(or equivalent nodes in input
) whenever possible:
/**
* This regex engine is designed for very simple cases, where the regular expression can be directly
* translated to common string operations. It will map expressions to simple index checks (
* {@link EmptyStartsWith}, {@link EmptyEndsWith}, {@link EmptyIndexOf}) or to the following methods
* of {@link String} (or equivalent nodes in {@link com.oracle.truffle.regex.tregex.nodes.input})
* whenever possible:
* <ul>
* <li>{@link String#isEmpty()}: {@link EmptyEquals}</li>
* <li>{@link String#indexOf(String)}: {@link IndexOfString}</li>
* <li>{@link String#startsWith(String)}: {@link StartsWith}</li>
* <li>{@link String#endsWith(String)}: {@link EndsWith}</li>
* <li>{@link String#equals(Object)}: {@link Equals}</li>
* <li>{@link String#regionMatches(int, String, int, int)}: {@link RegionMatches}</li>
* </ul>
*/
public final class LiteralRegexEngine {
public static LiteralRegexExecRootNode createNode(RegexLanguage language, RegexAST ast) {
/*
* Bail out if the search string would be huge. This can occur with expressions like
* /a{1000000}/.
*/
RegexProperties props = ast.getProperties();
if (ast.isLiteralString() && props.isFixedCodePointWidth() && (ast.getEncoding() == Encodings.UTF_16_RAW || !props.hasLoneSurrogates()) &&
(!props.hasQuantifiers() || ast.getRoot().getMinPath() <= Short.MAX_VALUE)) {
return createLiteralNode(language, ast);
} else {
return null;
}
}
private static LiteralRegexExecRootNode createLiteralNode(RegexLanguage language, RegexAST ast) {
PreCalcResultVisitor preCalcResultVisitor = PreCalcResultVisitor.run(ast, true);
boolean caret = ast.getRoot().startsWithCaret();
boolean dollar = ast.getRoot().endsWithDollar();
if (ast.getRoot().getMinPath() == 0) {
if (caret) {
if (dollar) {
return new EmptyEquals(language, ast, preCalcResultVisitor);
}
return new EmptyStartsWith(language, ast, preCalcResultVisitor);
}
if (dollar) {
return new EmptyEndsWith(language, ast, preCalcResultVisitor);
}
return new EmptyIndexOf(language, ast, preCalcResultVisitor);
}
if (caret) {
if (dollar) {
return new Equals(language, ast, preCalcResultVisitor);
}
return new StartsWith(language, ast, preCalcResultVisitor);
}
if (dollar) {
return new EndsWith(language, ast, preCalcResultVisitor);
}
if (ast.getFlags().isSticky()) {
return new RegionMatches(language, ast, preCalcResultVisitor);
}
if (preCalcResultVisitor.getLiteral().encodedLength() <= 64) {
return new IndexOfString(language, ast, preCalcResultVisitor);
}
return null;
}
}