/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.js.runtime.builtins;
import static com.oracle.truffle.js.runtime.builtins.JSAbstractArray.arrayGetRegexResult;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import com.oracle.truffle.api.CompilerAsserts;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.object.DynamicObject;
import com.oracle.truffle.api.object.DynamicObjectLibrary;
import com.oracle.truffle.api.object.HiddenKey;
import com.oracle.truffle.api.object.Shape;
import com.oracle.truffle.api.profiles.ConditionProfile;
import com.oracle.truffle.js.builtins.RegExpPrototypeBuiltins;
import com.oracle.truffle.js.lang.JavaScriptLanguage;
import com.oracle.truffle.js.runtime.JSContext;
import com.oracle.truffle.js.runtime.JSRealm;
import com.oracle.truffle.js.runtime.array.dyn.LazyRegexResultIndicesArray;
import com.oracle.truffle.js.runtime.interop.JSInteropUtil;
import com.oracle.truffle.js.runtime.objects.JSAttributes;
import com.oracle.truffle.js.runtime.objects.JSObjectUtil;
import com.oracle.truffle.js.runtime.objects.JSProperty;
import com.oracle.truffle.js.runtime.objects.JSShape;
import com.oracle.truffle.js.runtime.objects.Null;
import com.oracle.truffle.js.runtime.objects.PropertyProxy;
import com.oracle.truffle.js.runtime.objects.Undefined;
import com.oracle.truffle.js.runtime.util.Pair;
import com.oracle.truffle.js.runtime.util.TRegexUtil;
import com.oracle.truffle.js.runtime.util.TRegexUtil.InteropReadStringMemberNode;
import com.oracle.truffle.js.runtime.util.TRegexUtil.TRegexMaterializeResultNode;
import com.oracle.truffle.js.runtime.util.TRegexUtil.TRegexResultAccessor;
public final class JSRegExp extends JSNonProxy implements JSConstructorFactory.Default, PrototypeSupplier {
public static final JSRegExp INSTANCE = new JSRegExp();
public static final String CLASS_NAME = "RegExp";
public static final String PROTOTYPE_NAME = CLASS_NAME + ".prototype";
public static final String MULTILINE = "multiline";
public static final String GLOBAL = "global";
public static final String IGNORE_CASE = "ignoreCase";
public static final String STICKY = "sticky";
public static final String UNICODE = "unicode";
public static final String DOT_ALL = "dotAll";
public static final String SOURCE = "source";
public static final String FLAGS = "flags";
public static final String LAST_INDEX = "lastIndex";
public static final String INPUT = "input";
public static final String GROUPS = "groups";
public static final String INDEX = "index";
public static final String INDICES = "indices";
public static final PropertyProxy LAZY_INDEX_PROXY = new LazyRegexResultIndexProxyProperty();
public static final HiddenKey GROUPS_RESULT_ID = new HiddenKey("regexResult");
Since we cannot use nodes here, access to this property is special-cased in com.oracle.truffle.js.nodes.access.PropertyGetNode.LazyRegexResultIndexPropertyGetNode
. /**
* Since we cannot use nodes here, access to this property is special-cased in
* {@code com.oracle.truffle.js.nodes.access.PropertyGetNode.LazyRegexResultIndexPropertyGetNode}
* .
*/
public static class LazyRegexResultIndexProxyProperty implements PropertyProxy {
@Override
public Object get(DynamicObject object) {
return TRegexUtil.InvokeGetGroupBoundariesMethodNode.getUncached().execute(arrayGetRegexResult(object, DynamicObjectLibrary.getUncached()), TRegexUtil.Props.RegexResult.GET_START, 0);
}
@TruffleBoundary
@Override
public boolean set(DynamicObject object, Object value) {
JSObjectUtil.defineDataProperty(object, JSRegExp.INDEX, value, JSAttributes.getDefault());
return true;
}
}
public static class LazyNamedCaptureGroupProperty implements PropertyProxy {
private final String groupName;
private final int groupIndex;
private final ConditionProfile isIndicesObject = ConditionProfile.createBinaryProfile();
public LazyNamedCaptureGroupProperty(String groupName, int groupIndex) {
this.groupName = groupName;
this.groupIndex = groupIndex;
}
public int getGroupIndex() {
return groupIndex;
}
private final TRegexMaterializeResultNode materializeNode = TRegexMaterializeResultNode.getUncached();
@Override
public Object get(DynamicObject object) {
JSRegExpGroupsObject groups = (JSRegExpGroupsObject) object;
Object regexResult = groups.getRegexResult();
if (isIndicesObject.profile(groups.isIndices())) {
return LazyRegexResultIndicesArray.getIntIndicesArray(JavaScriptLanguage.getCurrentJSRealm().getContext(), TRegexResultAccessor.getUncached(), regexResult, groupIndex);
} else {
String input = groups.getInputString();
return materializeNode.materializeGroup(regexResult, groupIndex, input);
}
}
@Override
public boolean set(DynamicObject object, Object value) {
JSObjectUtil.defineDataProperty(object, groupName, value, JSAttributes.getDefault());
return true;
}
}
private JSRegExp() {
}
public static Object getCompiledRegex(DynamicObject thisObj) {
assert isJSRegExp(thisObj);
return ((JSRegExpObject) thisObj).getCompiledRegex();
}
public static JSObjectFactory getGroupsFactory(DynamicObject thisObj) {
assert isJSRegExp(thisObj);
return ((JSRegExpObject) thisObj).getGroupsFactory();
}
public static Object getRealm(DynamicObject thisObj) {
assert isJSRegExp(thisObj);
return ((JSRegExpObject) thisObj).getRealm();
}
public static boolean getLegacyFeaturesEnabled(DynamicObject thisObj) {
assert isJSRegExp(thisObj);
return ((JSRegExpObject) thisObj).getLegacyFeaturesEnabled();
}
Creates a new JavaScript RegExp object (with a lastIndex
of 0). This overload incurs hitting a TruffleBoundary
when having to examine the compiledRegex
for information about named capture groups. In order to avoid a TruffleBoundary
in cases when your regular expression has no named capture groups, consider using the com.oracle.truffle.js.nodes.intl.CreateRegExpNode
.
/**
* Creates a new JavaScript RegExp object (with a {@code lastIndex} of 0).
* <p>
* This overload incurs hitting a {@link TruffleBoundary} when having to examine the
* {@code compiledRegex} for information about named capture groups. In order to avoid a
* {@link TruffleBoundary} in cases when your regular expression has no named capture groups,
* consider using the {@code com.oracle.truffle.js.nodes.intl.CreateRegExpNode}.
*/
public static DynamicObject create(JSContext ctx, Object compiledRegex) {
JSObjectFactory groupsFactory = computeGroupsFactory(ctx, compiledRegex);
DynamicObject obj = create(ctx, compiledRegex, groupsFactory);
JSObjectUtil.putDataProperty(ctx, obj, LAST_INDEX, 0, JSAttributes.notConfigurableNotEnumerableWritable());
assert isJSRegExp(obj);
return obj;
}
Creates a new JavaScript RegExp object without a lastIndex
property. /**
* Creates a new JavaScript RegExp object <em>without</em> a {@code lastIndex} property.
*/
public static DynamicObject create(JSContext context, Object compiledRegex, JSObjectFactory groupsFactory) {
return create(context, compiledRegex, groupsFactory, true);
}
Creates a new JavaScript RegExp object without a lastIndex
property. /**
* Creates a new JavaScript RegExp object <em>without</em> a {@code lastIndex} property.
*/
public static DynamicObject create(JSContext context, Object compiledRegex, JSObjectFactory groupsFactory, boolean legacyFeaturesEnabled) {
JSRealm realm = context.getRealm();
DynamicObject regExp = JSRegExpObject.create(realm, context.getRegExpFactory(), compiledRegex, groupsFactory, legacyFeaturesEnabled);
assert isJSRegExp(regExp);
return context.trackAllocation(regExp);
}
private static void initialize(JSContext ctx, DynamicObject regExp, Object regex) {
((JSRegExpObject) regExp).setCompiledRegex(regex);
((JSRegExpObject) regExp).setGroupsFactory(computeGroupsFactory(ctx, regex));
}
public static void updateCompilation(JSContext ctx, DynamicObject thisObj, Object regex) {
assert isJSRegExp(thisObj) && regex != null;
initialize(ctx, thisObj, regex);
}
public static DynamicObject createGroupsObject(JSContext context, JSObjectFactory groupsFactory, Object regexResult, String input, boolean isIndices) {
JSRealm realm = context.getRealm();
DynamicObject obj = JSRegExpGroupsObject.create(realm, groupsFactory, regexResult, input, isIndices);
return context.trackAllocation(obj);
}
@TruffleBoundary
private static JSObjectFactory computeGroupsFactory(JSContext ctx, Object compiledRegex) {
Object namedCaptureGroups = TRegexUtil.InteropReadMemberNode.getUncached().execute(compiledRegex, TRegexUtil.Props.CompiledRegex.GROUPS);
if (TRegexUtil.InteropIsNullNode.getUncached().execute(namedCaptureGroups)) {
return null;
} else {
return buildGroupsFactory(ctx, namedCaptureGroups);
}
}
private static final Comparator<Pair<Integer, String>> NAMED_GROUPS_COMPARATOR = new Comparator<Pair<Integer, String>>() {
@Override
public int compare(Pair<Integer, String> group1, Pair<Integer, String> group2) {
return group1.getFirst() - group2.getFirst();
}
};
@TruffleBoundary
public static JSObjectFactory buildGroupsFactory(JSContext ctx, Object namedCaptureGroups) {
Shape groupsShape = ctx.getRegExpGroupsEmptyShape();
List<Object> keys = JSInteropUtil.keys(namedCaptureGroups);
List<Pair<Integer, String>> pairs = new ArrayList<>(keys.size());
for (Object key : keys) {
String groupName = (String) key;
int groupIndex = TRegexUtil.InteropReadIntMemberNode.getUncached().execute(namedCaptureGroups, groupName);
pairs.add(new Pair<>(groupIndex, groupName));
}
Collections.sort(pairs, NAMED_GROUPS_COMPARATOR);
Shape.DerivedBuilder builder = Shape.newBuilder(groupsShape);
for (Pair<Integer, String> pair : pairs) {
int groupIndex = pair.getFirst();
String groupName = pair.getSecond();
builder.addConstantProperty(groupName, new LazyNamedCaptureGroupProperty(groupName, groupIndex), JSAttributes.getDefault() | JSProperty.PROXY);
}
groupsShape = builder.build();
return JSObjectFactory.createBound(ctx, Null.instance, groupsShape);
}
Format: '/' pattern '/' flags, flags may contain 'g' (global), 'i' (ignore case) and 'm'
(multiline).
Example: /ab*c/gi
/**
* Format: '/' pattern '/' flags, flags may contain 'g' (global), 'i' (ignore case) and 'm'
* (multiline).<br>
* Example: <code>/ab*c/gi</code>
*/
@TruffleBoundary
public static String prototypeToString(DynamicObject thisObj) {
Object regex = getCompiledRegex(thisObj);
InteropReadStringMemberNode readString = TRegexUtil.InteropReadStringMemberNode.getUncached();
String pattern = readString.execute(regex, TRegexUtil.Props.CompiledRegex.PATTERN);
if (pattern.length() == 0) {
pattern = "(?:)";
}
String flags = readString.execute(TRegexUtil.InteropReadMemberNode.getUncached().execute(regex, TRegexUtil.Props.CompiledRegex.FLAGS), TRegexUtil.Props.Flags.SOURCE);
return "/" + pattern + '/' + flags;
}
// non-standard according to ES2015, 7.2.8 IsRegExp (@@match check missing)
public static boolean isJSRegExp(Object obj) {
return obj instanceof JSRegExpObject;
}
@Override
public DynamicObject createPrototype(JSRealm realm, DynamicObject ctor) {
JSContext ctx = realm.getContext();
DynamicObject prototype;
if (ctx.getEcmaScriptVersion() < 6) {
Shape shape = JSShape.createPrototypeShape(realm.getContext(), INSTANCE, realm.getObjectPrototype());
prototype = JSRegExpObject.create(shape, compileEarly(realm, "", ""), realm);
JSObjectUtil.setOrVerifyPrototype(ctx, prototype, realm.getObjectPrototype());
JSObjectUtil.putDataProperty(ctx, prototype, LAST_INDEX, 0, JSAttributes.notConfigurableNotEnumerableWritable());
} else {
prototype = JSObjectUtil.createOrdinaryPrototypeObject(realm);
}
putRegExpPropertyAccessor(realm, prototype, SOURCE);
putRegExpPropertyAccessor(realm, prototype, FLAGS);
putRegExpPropertyAccessor(realm, prototype, MULTILINE);
putRegExpPropertyAccessor(realm, prototype, GLOBAL);
putRegExpPropertyAccessor(realm, prototype, IGNORE_CASE);
if (ctx.getEcmaScriptVersion() >= 6) {
putRegExpPropertyAccessor(realm, prototype, STICKY);
putRegExpPropertyAccessor(realm, prototype, UNICODE);
}
if (ctx.getEcmaScriptVersion() >= 9) {
putRegExpPropertyAccessor(realm, prototype, DOT_ALL);
}
// ctor and functions
JSObjectUtil.putConstructorProperty(ctx, prototype, ctor);
JSObjectUtil.putFunctionsFromContainer(realm, prototype, RegExpPrototypeBuiltins.BUILTINS);
return prototype;
}
private static void putRegExpPropertyAccessor(JSRealm realm, DynamicObject prototype, String name) {
DynamicObject getter = realm.lookupFunction(RegExpPrototypeBuiltins.RegExpPrototypeGetterBuiltins.BUILTINS, name);
JSObjectUtil.putBuiltinAccessorProperty(prototype, name, getter, Undefined.instance);
}
private static Object compileEarly(JSRealm realm, String pattern, String flags) {
return TRegexUtil.CompileRegexNode.getUncached().execute(JSContext.createTRegexEngine(realm.getEnv(), realm.getContext().getContextOptions()), pattern, flags);
}
@Override
public Shape makeInitialShape(JSContext ctx, DynamicObject thisObj) {
return JSObjectUtil.getProtoChildShape(thisObj, INSTANCE, ctx);
}
public static Shape makeInitialGroupsObjectShape(JSContext context) {
CompilerAsserts.neverPartOfCompilation();
return JSShape.createRootWithNullProto(context, JSOrdinary.BARE_INSTANCE);
}
@Override
public void fillConstructor(JSRealm realm, DynamicObject constructor) {
putConstructorSpeciesGetter(realm, constructor);
}
public static JSConstructor createConstructor(JSRealm realm) {
return INSTANCE.createConstructorAndPrototype(realm);
}
@Override
public String getClassName() {
return CLASS_NAME;
}
@Override
public String getClassName(DynamicObject object) {
return getClassName();
}
@Override
public String getBuiltinToStringTag(DynamicObject object) {
return getClassName(object);
}
@Override
@TruffleBoundary
public String toDisplayStringImpl(DynamicObject obj, int depth, boolean allowSideEffects, JSContext context) {
if (context.isOptionNashornCompatibilityMode()) {
return "[RegExp " + prototypeToString(obj) + "]";
} else {
return prototypeToString(obj);
}
}
@Override
public DynamicObject getIntrinsicDefaultProto(JSRealm realm) {
return realm.getRegExpPrototype();
}
@TruffleBoundary
public static CharSequence escapeRegExpPattern(CharSequence pattern) {
if (pattern.length() == 0) {
return "(?:)";
}
int extraChars = escapeRegExpExtraCharCount(pattern);
if (extraChars == 0) {
return pattern;
} else {
return escapeRegExpPattern(pattern, extraChars);
}
}
Returns the number of extra characters that need to be inserted into pattern
in order for it to be correctly escaped for use in a RegExp literal (according to the requirements of EscapeRegExpPattern). This method satisfies the following property: if its return value is 0, the pattern does not need to be modified by EscapeRegExpPattern. In order to satisfy this property, this method can sometimes return a result that is 1 higher than the advertised value. This is the case when the pattern needs escaping but none of the escapes actually prolong the pattern, as in "\\\n"
, which is escaped as "\\n"
and where both the original and the escaped pattern are of length 2. /**
* Returns the number of extra characters that need to be inserted into {@code pattern} in order
* for it to be correctly escaped for use in a RegExp literal (according to the requirements of
* EscapeRegExpPattern).
*
* This method satisfies the following property: if its return value is 0, the pattern does not
* need to be modified by EscapeRegExpPattern. In order to satisfy this property, this method
* can sometimes return a result that is 1 higher than the advertised value. This is the case
* when the pattern needs escaping but none of the escapes actually prolong the pattern, as in
* {@code "\\\n"}, which is escaped as {@code "\\n"} and where both the original and the escaped
* pattern are of length 2.
*/
private static int escapeRegExpExtraCharCount(CharSequence pattern) {
// The body of this method mirrors that of escapeRegExpPattern. However, instead of actually
// allocating and filling a new StringBuilder, it only scans the input pattern and takes
// note of any characters that will need to be escaped.
int extraChars = 0;
boolean insideCharClass = false;
int i = 0;
while (i < pattern.length()) {
switch (pattern.charAt(i)) {
case '\\':
assert i + 1 < pattern.length();
i++;
switch (pattern.charAt(i)) {
case '\n':
case '\r':
// We are replacing "\\\n" with "\\n" or "\\\r" with "\\r". We are not
// adding any extra characters but we are still modifying the pattern.
// Therefore, we make sure that resulting value extraChars is at least
// 1.
extraChars = Math.max(extraChars, 1);
break;
case '\u2028':
case '\u2029':
extraChars += 4;
break;
}
break;
case '\n':
case '\r':
extraChars += 1;
break;
case '\u2028':
case '\u2029':
extraChars += 5;
break;
case '/':
if (!insideCharClass) {
extraChars += 1;
}
break;
case '[':
insideCharClass = true;
break;
case ']':
insideCharClass = false;
break;
}
i++;
}
return extraChars;
}
Implements the EscapeRegExpPattern abstract operation from the ECMAScript spec.
Params: - pattern – the input pattern, which is assumed to be non-empty
- extraChars – an estimate on the difference of sizes between the original pattern and the
escaped pattern
Returns: the escaped pattern
/**
* Implements the EscapeRegExpPattern abstract operation from the ECMAScript spec.
*
* @param pattern the input pattern, which is assumed to be non-empty
* @param extraChars an estimate on the difference of sizes between the original pattern and the
* escaped pattern
* @return the escaped pattern
*/
@TruffleBoundary
private static String escapeRegExpPattern(CharSequence pattern, int extraChars) {
StringBuilder sb = new StringBuilder(pattern.length() + extraChars);
boolean insideCharClass = false;
int i = 0;
while (i < pattern.length()) {
char c = pattern.charAt(i);
switch (c) {
case '\\':
assert i + 1 < pattern.length();
sb.append(c);
i++;
c = pattern.charAt(i);
// The patterns used in RegExp objects can not only have literal LineTerminators
// (e.g. RegExp("\n")), they can also have identity escapes of literal
// LineTerminators (e.g. RegExp("\\\n")) (note that this is only valid when the
// Unicode flag is not present). Since LineTerminators are not allowed in RegExp
// literals, we have to replace these identity escapes with other escapes.
switch (c) {
case '\n':
sb.append('n');
break;
case '\r':
sb.append('r');
break;
case '\u2028':
sb.append("u2028");
break;
case '\u2029':
sb.append("u2029");
break;
default:
sb.append(c);
}
break;
case '\n':
sb.append("\\n");
break;
case '\r':
sb.append("\\r");
break;
case '\u2028':
sb.append("\\u2028");
break;
case '\u2029':
sb.append("\\u2029");
break;
case '/':
// According to the syntax of RegularExpressionLiterals, forward slashes are
// allowed inside character classes and therefore do not have to be escaped.
if (!insideCharClass) {
sb.append("\\/");
} else {
sb.append('/');
}
break;
case '[':
insideCharClass = true;
sb.append(c);
break;
case ']':
insideCharClass = false;
sb.append(c);
break;
default:
sb.append(c);
}
i++;
}
return sb.toString();
}
}