package org.jruby;
import static org.jruby.anno.FrameField.BACKREF;
import static org.jruby.anno.FrameField.LASTLINE;
import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.joni.Matcher;
import org.joni.NameEntry;
import org.joni.Option;
import org.joni.Regex;
import org.joni.Region;
import org.joni.Syntax;
import org.joni.WarnCallback;
import org.joni.exception.JOniException;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.common.IRubyWarnings.ID;
import org.jruby.exceptions.RaiseException;
import org.jruby.parser.ReOptions;
import org.jruby.runtime.Block;
import org.jruby.runtime.ClassIndex;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Visibility;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.encoding.EncodingCapable;
import org.jruby.runtime.encoding.MarshalEncoding;
import org.jruby.runtime.marshal.MarshalStream;
import org.jruby.runtime.marshal.UnmarshalStream;
import org.jruby.util.ByteList;
import org.jruby.util.KCode;
import org.jruby.util.RegexpOptions;
import org.jruby.util.RegexpSupport;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;
import org.jruby.util.cli.Options;
import org.jruby.util.io.EncodingUtils;
import org.jruby.util.collections.WeakValuedMap;
import static org.jruby.util.StringSupport.EMPTY_BYTELIST_ARRAY;
import static org.jruby.util.StringSupport.EMPTY_STRING_ARRAY;
import java.util.Iterator;
@JRubyClass(name="Regexp")
public class RubyRegexp extends RubyObject implements ReOptions, EncodingCapable, MarshalEncoding {
Regex pattern;
private ByteList str = ByteList.EMPTY_BYTELIST;
private RegexpOptions options;
public static final int ARG_ENCODING_FIXED = ReOptions.RE_FIXED;
public static final int ARG_ENCODING_NONE = ReOptions.RE_NONE;
public void setLiteral() {
options.setLiteral(true);
}
public void clearLiteral() {
options.setLiteral(false);
}
public boolean isLiteral() {
return options.isLiteral();
}
public boolean isKCodeDefault() {
return options.isKcodeDefault();
}
public void setEncodingNone() {
options.setEncodingNone(true);
}
public void clearEncodingNone() {
options.setEncodingNone(false);
}
public boolean isEncodingNone() {
return options.isEncodingNone();
}
public KCode getKCode() {
return options.getKCode();
}
@Override
public Encoding getEncoding() {
return pattern.getEncoding();
}
@Override
public void setEncoding(Encoding encoding) {
}
@Override
public boolean shouldMarshalEncoding() {
return getEncoding() != ASCIIEncoding.INSTANCE;
}
@Override
public Encoding getMarshalEncoding() {
return getEncoding();
}
static final WeakValuedMap<ByteList, Regex> patternCache = new WeakValuedMap();
static final WeakValuedMap<ByteList, Regex> quotedPatternCache = new WeakValuedMap();
static final WeakValuedMap<ByteList, Regex> preprocessedPatternCache = new WeakValuedMap();
private static Regex makeRegexp(Ruby runtime, ByteList bytes, RegexpOptions options, Encoding enc) {
try {
int p = bytes.getBegin();
return new Regex(bytes.getUnsafeBytes(), p, p + bytes.getRealSize(), options.toJoniOptions(), enc, Syntax.DEFAULT, runtime.getRegexpWarnings());
} catch (Exception e) {
RegexpSupport.raiseRegexpError19(runtime, bytes, enc, options, e.getMessage());
return null;
}
}
public static Regex getRegexpFromCache(Ruby runtime, ByteList bytes, Encoding enc, RegexpOptions options) {
Regex regex = patternCache.get(bytes);
if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
regex = makeRegexp(runtime, bytes, options, enc);
regex.setUserObject(bytes);
patternCache.put(bytes, regex);
return regex;
}
static Regex getQuotedRegexpFromCache(Ruby runtime, RubyString str, RegexpOptions options) {
final ByteList bytes = str.getByteList();
Regex regex = quotedPatternCache.get(bytes);
Encoding enc = str.isAsciiOnly() ? USASCIIEncoding.INSTANCE : bytes.getEncoding();
if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
final ByteList quoted = quote(str);
regex = makeRegexp(runtime, quoted, options, quoted.getEncoding());
regex.setUserObject(quoted);
quotedPatternCache.put(bytes, regex);
return regex;
}
private static Regex getPreprocessedRegexpFromCache(Ruby runtime, ByteList bytes, Encoding enc, RegexpOptions options, RegexpSupport.ErrorMode mode) {
Regex regex = preprocessedPatternCache.get(bytes);
if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
ByteList preprocessed = RegexpSupport.preprocess(runtime, bytes, enc, new Encoding[]{null}, RegexpSupport.ErrorMode.RAISE);
regex = makeRegexp(runtime, preprocessed, options, enc);
regex.setUserObject(preprocessed);
preprocessedPatternCache.put(bytes, regex);
return regex;
}
public static RubyClass createRegexpClass(Ruby runtime) {
RubyClass regexpClass = runtime.defineClass("Regexp", runtime.getObject(), REGEXP_ALLOCATOR);
runtime.setRegexp(regexpClass);
regexpClass.setClassIndex(ClassIndex.REGEXP);
regexpClass.setReifiedClass(RubyRegexp.class);
regexpClass.kindOf = new RubyModule.JavaClassKindOf(RubyRegexp.class);
regexpClass.defineConstant("IGNORECASE", runtime.newFixnum(RE_OPTION_IGNORECASE));
regexpClass.defineConstant("EXTENDED", runtime.newFixnum(RE_OPTION_EXTENDED));
regexpClass.defineConstant("MULTILINE", runtime.newFixnum(RE_OPTION_MULTILINE));
regexpClass.defineConstant("FIXEDENCODING", runtime.newFixnum(RE_FIXED));
regexpClass.defineConstant("NOENCODING", runtime.newFixnum(RE_NONE));
regexpClass.defineAnnotatedMethods(RubyRegexp.class);
regexpClass.getSingletonClass().defineAlias("compile", "new");
return regexpClass;
}
private static ObjectAllocator REGEXP_ALLOCATOR = new ObjectAllocator() {
@Override
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
return new RubyRegexp(runtime, klass);
}
};
public static int matcherSearch(ThreadContext context, Matcher matcher, int start, int range, int option) {
if (!context.runtime.getInstanceConfig().isInterruptibleRegexps()) return matcher.search(start, range, option);
try {
RubyThread thread = context.getThread();
SearchMatchTask task = new SearchMatchTask(thread, start, range, option, false);
return thread.executeTask(context, matcher, task);
} catch (InterruptedException e) {
throw context.runtime.newInterruptedRegexpError("Regexp Interrupted");
}
}
public static int matcherMatch(ThreadContext context, Matcher matcher, int start, int range, int option) {
if (!context.runtime.getInstanceConfig().isInterruptibleRegexps()) return matcher.match(start, range, option);
try {
RubyThread thread = context.getThread();
SearchMatchTask task = new SearchMatchTask(thread, start, range, option, true);
return thread.executeTask(context, matcher, task);
} catch (InterruptedException e) {
throw context.runtime.newInterruptedRegexpError("Regexp Interrupted");
}
}
@Deprecated
public static int matcherSearch(Ruby runtime, Matcher matcher, int start, int range, int option) {
return matcherSearch(runtime.getCurrentContext(), matcher, start, range, option);
}
@Deprecated
public static int matcherMatch(Ruby runtime, Matcher matcher, int start, int range, int option) {
try {
ThreadContext context = runtime.getCurrentContext();
RubyThread thread = context.getThread();
SearchMatchTask task = new SearchMatchTask(thread, start, range, option, true);
return thread.executeTask(context, matcher, task);
} catch (InterruptedException e) {
throw runtime.newInterruptedRegexpError("Regexp Interrupted");
}
}
private static class SearchMatchTask implements RubyThread.Task<Matcher, Integer> {
final RubyThread thread;
final int start;
final int range;
final int option;
final boolean match;
SearchMatchTask(RubyThread thread, int start, int range, int option, boolean match) {
this.thread = thread;
this.start = start;
this.range = range;
this.option = option;
this.match = match;
}
@Override
public Integer run(ThreadContext context, Matcher matcher) throws InterruptedException {
return match ?
matcher.matchInterruptible(start, range, option) :
matcher.searchInterruptible(start, range, option);
}
@Override
public void wakeup(RubyThread thread, Matcher matcher) {
thread.getNativeThread().interrupt();
}
}
@Override
public ClassIndex getNativeClassIndex() {
return ClassIndex.REGEXP;
}
private RubyRegexp(Ruby runtime, RubyClass klass) {
super(runtime, klass);
this.options = new RegexpOptions();
}
RubyRegexp(Ruby runtime) {
super(runtime, runtime.getRegexp());
this.options = new RegexpOptions();
}
public RubyRegexp(Ruby runtime, Regex pattern, ByteList str, RegexpOptions options) {
super(runtime, runtime.getRegexp());
this.pattern = pattern;
this.str = str;
this.options = options;
}
private RubyRegexp(Ruby runtime, ByteList str) {
this(runtime);
assert str != null;
this.str = str;
this.pattern = getRegexpFromCache(runtime, str, str.getEncoding(), RegexpOptions.NULL_OPTIONS);
}
private RubyRegexp(Ruby runtime, ByteList str, RegexpOptions options) {
this(runtime);
assert str != null;
regexpInitialize(str, str.getEncoding(), options);
}
public static RubyRegexp newRegexp(Ruby runtime, String pattern, RegexpOptions options) {
return newRegexp(runtime, ByteList.create(pattern), options);
}
public static RubyRegexp newRegexp(Ruby runtime, ByteList pattern, int options) {
return newRegexp(runtime, pattern, RegexpOptions.fromEmbeddedOptions(options));
}
public static RubyRegexp newRegexp(Ruby runtime, ByteList pattern, RegexpOptions options) {
try {
return new RubyRegexp(runtime, pattern, options.clone());
} catch (RaiseException re) {
throw runtime.newSyntaxError(re.getMessage());
}
}
public static RubyRegexp newRegexpParser(Ruby runtime, ByteList pattern, RegexpOptions options) {
return new RubyRegexp(runtime, pattern, options.clone());
}
public static RubyRegexp newDRegexp(Ruby runtime, RubyString pattern, RegexpOptions options) {
try {
return new RubyRegexp(runtime, pattern.getByteList(), options.clone());
} catch (RaiseException re) {
throw runtime.newRegexpError(re.getMessage());
}
}
public static RubyRegexp newDRegexp(Ruby runtime, RubyString pattern, int joniOptions) {
try {
RegexpOptions options = RegexpOptions.fromJoniOptions(joniOptions);
return new RubyRegexp(runtime, pattern.getByteList(), options);
} catch (RaiseException re) {
throw runtime.newRegexpError(re.getMessage());
}
}
public static RubyRegexp newRegexp(Ruby runtime, ByteList pattern) {
return new RubyRegexp(runtime, pattern);
}
static RubyRegexp newRegexp(Ruby runtime, ByteList str, Regex pattern) {
RubyRegexp regexp = new RubyRegexp(runtime);
assert str != null;
regexp.str = str;
regexp.options = RegexpOptions.fromJoniOptions(pattern.getOptions());
regexp.pattern = pattern;
return regexp;
}
static RubyRegexp newDummyRegexp(Ruby runtime, Regex regex) {
RubyRegexp regexp = new RubyRegexp(runtime);
regexp.pattern = regex;
regexp.str = ByteList.EMPTY_BYTELIST;
regexp.options.setFixed(true);
return regexp;
}
public static RubyRegexp newRegexpFromStr(Ruby runtime, RubyString s, int options) {
RubyRegexp re = (RubyRegexp)runtime.getRegexp().allocate();
re.regexpInitializeString(s, RegexpOptions.fromJoniOptions(options));
return re;
}
public final RegexpOptions getOptions() {
check();
return options;
}
public final Regex getPattern() {
check();
return pattern;
}
private static void encodingMatchError(Ruby runtime, Regex pattern, Encoding strEnc) {
throw runtime.newEncodingCompatibilityError("incompatible encoding regexp match (" +
pattern.getEncoding() + " regexp with " + strEnc + " string)");
}
private Encoding prepareEncoding(RubyString str, boolean warn) {
Encoding enc = str.getEncoding();
int cr = str.scanForCodeRange();
if (cr == StringSupport.CR_BROKEN) {
throw getRuntime().newArgumentError("invalid byte sequence in " + enc);
}
check();
Encoding patternEnc = pattern.getEncoding();
if (patternEnc == enc) {
} else if (cr == StringSupport.CR_7BIT && patternEnc == USASCIIEncoding.INSTANCE) {
enc = patternEnc;
} else if (!enc.isAsciiCompatible()) {
encodingMatchError(getRuntime(), pattern, enc);
} else if (options.isFixed()) {
if (enc != patternEnc &&
(!patternEnc.isAsciiCompatible() ||
cr != StringSupport.CR_7BIT)) encodingMatchError(getRuntime(), pattern, enc);
enc = patternEnc;
}
if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && cr != StringSupport.CR_7BIT) {
metaClass.runtime.getWarnings().warn(ID.REGEXP_MATCH_AGAINST_STRING, "historical binary regexp match /.../n against " + enc + " string");
}
return enc;
}
public final Regex preparePattern(RubyString str) {
Encoding enc = prepareEncoding(str, true);
if (enc == pattern.getEncoding()) return pattern;
return getPreprocessedRegexpFromCache(metaClass.runtime, this.str, enc, options, RegexpSupport.ErrorMode.PREPROCESS);
}
private static void preprocessLight(Ruby runtime, ByteList str, Encoding enc, Encoding[]fixedEnc, RegexpSupport.ErrorMode mode) {
if (enc.isAsciiCompatible()) {
fixedEnc[0] = null;
} else {
fixedEnc[0] = enc;
}
boolean hasProperty = RegexpSupport.unescapeNonAscii(runtime, null, str.getUnsafeBytes(), str.getBegin(), str.getBegin() + str.getRealSize(), enc, fixedEnc, str, mode);
if (hasProperty && fixedEnc[0] == null) fixedEnc[0] = enc;
}
public static void preprocessCheck(Ruby runtime, ByteList bytes) {
RegexpSupport.preprocess(runtime, bytes, bytes.getEncoding(), new Encoding[]{null}, RegexpSupport.ErrorMode.RAISE);
}
public static RubyString preprocessDRegexp(Ruby runtime, RubyString[] strings, int embeddedOptions) {
return preprocessDRegexp(runtime, strings, RegexpOptions.fromEmbeddedOptions(embeddedOptions));
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject[] strings, RegexpOptions options) {
RubyString string = null;
Encoding regexpEnc = null;
for (int i = 0; i < strings.length; i++) {
RubyString str = strings[i].convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, runtime.getCurrentContext().encodingHolder(), str);
string = string == null ? (RubyString) str.dup() : string.append19(str);
}
if (regexpEnc != null) string.setEncoding(regexpEnc);
return string;
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, options, null, runtime.getCurrentContext().encodingHolder());
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, options, null, runtime.getCurrentContext().encodingHolder());
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, options, null, runtime.getCurrentContext().encodingHolder());
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, arg3, options, null, runtime.getCurrentContext().encodingHolder());
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, IRubyObject arg4, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, arg3, arg4, options, null, runtime.getCurrentContext().encodingHolder());
}
private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
IRubyObject arg3,
IRubyObject arg4,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, arg3, arg4, options, regexpEnc, fixedEnc);
}
private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
IRubyObject arg3,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, arg3, options, regexpEnc, fixedEnc);
}
private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, options, regexpEnc, fixedEnc);
}
private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, options, regexpEnc, fixedEnc);
}
private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
result = result == null ? str.strDup(runtime) : result.append19(str);
if (regexpEnc != null) result.setEncoding(regexpEnc);
return result;
}
private static Encoding processDRegexpElement(Ruby runtime, RegexpOptions options, Encoding regexpEnc, Encoding[] fixedEnc, RubyString str) {
Encoding strEnc = str.getEncoding();
if (options.isEncodingNone() && strEnc != ASCIIEncoding.INSTANCE) {
if (str.scanForCodeRange() != StringSupport.CR_7BIT) {
throw runtime.newRegexpError("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
}
strEnc = ASCIIEncoding.INSTANCE;
}
RubyRegexp.preprocessLight(runtime, str.getByteList(), strEnc, fixedEnc, RegexpSupport.ErrorMode.PREPROCESS);
if (fixedEnc[0] != null) {
if (regexpEnc != null && regexpEnc != fixedEnc[0]) {
throw runtime.newRegexpError("encoding mismatch in dynamic regexp: " +
new String(regexpEnc.getName()) + " and " + new String(fixedEnc[0].getName())
);
}
regexpEnc = fixedEnc[0];
}
return regexpEnc;
}
private void check() {
if (pattern == null) throw metaClass.runtime.newTypeError("uninitialized Regexp");
}
@JRubyMethod(meta = true)
public static IRubyObject try_convert(ThreadContext context, IRubyObject recv, IRubyObject args) {
return TypeConverter.convertToTypeWithCheck(args, context.runtime.getRegexp(), "to_regexp");
}
@JRubyMethod(name = {"quote", "escape"}, meta = true)
public static RubyString quote(ThreadContext context, IRubyObject recv, IRubyObject arg) {
final RubyString str = operandCheck(arg);
return RubyString.newStringShared(context.runtime, quote(str));
}
@Deprecated
public static IRubyObject quote19(ThreadContext context, IRubyObject recv, IRubyObject arg) {
return quote(context, recv, arg);
}
static ByteList quote(final RubyString str) {
final ByteList bytes = str.getByteList();
final ByteList qBytes = quote(bytes, str.isAsciiOnly());
if (qBytes == bytes) str.setByteListShared();
return qBytes;
}
private static final int QUOTED_V = 11;
static ByteList quote(ByteList bs, boolean asciiOnly) {
int p = bs.getBegin();
int end = p + bs.getRealSize();
byte[] bytes = bs.getUnsafeBytes();
Encoding enc = bs.getEncoding();
metaFound: do {
while (p < end) {
final int c;
final int cl;
if (enc.isAsciiCompatible()) {
cl = 1;
c = bytes[p] & 0xff;
} else {
cl = StringSupport.preciseLength(enc, bytes, p, end);
if (cl < 0) {
p += StringSupport.length(enc, bytes, p, end);
continue;
}
c = enc.mbcToCode(bytes, p, end);
}
if (!Encoding.isAscii(c)) {
p += StringSupport.length(enc, bytes, p, end);
continue;
}
switch (c) {
case '[': case ']': case '{': case '}':
case '(': case ')': case '|': case '-':
case '*': case '.': case '\\':
case '?': case '+': case '^': case '$':
case ' ': case '#':
case '\t': case '\f': case QUOTED_V: case '\n': case '\r':
break metaFound;
}
p += cl;
}
if (asciiOnly) {
ByteList tmp = bs.shallowDup();
tmp.setEncoding(USASCIIEncoding.INSTANCE);
return tmp;
}
return bs;
} while (false);
ByteList result = new ByteList(end * 2);
result.setEncoding(asciiOnly ? USASCIIEncoding.INSTANCE : bs.getEncoding());
byte[] obytes = result.getUnsafeBytes();
int op = p - bs.getBegin();
System.arraycopy(bytes, bs.getBegin(), obytes, 0, op);
while (p < end) {
final int c;
final int cl;
if (enc.isAsciiCompatible()) {
cl = 1;
c = bytes[p] & 0xff;
} else {
cl = StringSupport.preciseLength(enc, bytes, p, end);
c = enc.mbcToCode(bytes, p, end);
}
if (!Encoding.isAscii(c)) {
int n = StringSupport.length(enc, bytes, p, end);
while (n-- > 0) obytes[op++] = bytes[p++];
continue;
}
p += cl;
switch (c) {
case '[': case ']': case '{': case '}':
case '(': case ')': case '|': case '-':
case '*': case '.': case '\\':
case '?': case '+': case '^': case '$':
case '#':
op += enc.codeToMbc('\\', obytes, op);
break;
case ' ':
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc(' ', obytes, op);
continue;
case '\t':
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc('t', obytes, op);
continue;
case '\n':
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc('n', obytes, op);
continue;
case '\r':
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc('r', obytes, op);
continue;
case '\f':
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc('f', obytes, op);
continue;
case QUOTED_V:
op += enc.codeToMbc('\\', obytes, op);
op += enc.codeToMbc('v', obytes, op);
continue;
}
op += enc.codeToMbc(c, obytes, op);
}
result.setRealSize(op);
return result;
}
@Deprecated
public static ByteList quote19(ByteList bs, boolean asciiOnly) {
return quote(bs, asciiOnly);
}
@JRubyMethod(name = "last_match", meta = true, reads = BACKREF)
public static IRubyObject last_match_s(ThreadContext context, IRubyObject recv) {
return getBackRef(context);
}
public static IRubyObject getBackRef(ThreadContext context) {
IRubyObject backref = context.getBackRef();
if (backref instanceof RubyMatchData) ((RubyMatchData) backref).use();
return backref;
}
@JRubyMethod(name = "last_match", meta = true, reads = BACKREF)
public static IRubyObject last_match_s(ThreadContext context, IRubyObject recv, IRubyObject nth) {
IRubyObject match = context.getBackRef();
if (match.isNil()) return match;
return nth_match(((RubyMatchData)match).backrefNumber(context.runtime, nth), match);
}
@JRubyMethod(name = "union", rest = true, meta = true)
public static IRubyObject union(ThreadContext context, IRubyObject recv, IRubyObject[] args) {
IRubyObject obj;
if (args.length == 1 && !(obj = args[0].checkArrayType()).isNil()) {
RubyArray ary = (RubyArray)obj;
IRubyObject[] tmp = new IRubyObject[ary.size()];
ary.copyInto(tmp, 0);
args = tmp;
}
Ruby runtime = context.runtime;
if (args.length == 0) {
return runtime.getRegexp().newInstance(context, runtime.newString("(?!)"), Block.NULL_BLOCK);
} else if (args.length == 1) {
IRubyObject re = TypeConverter.convertToTypeWithCheck(args[0], runtime.getRegexp(), "to_regexp");
return !re.isNil() ? re : newRegexpFromStr(runtime, quote(context, recv, args[0]), 0);
} else {
boolean hasAsciiOnly = false;
final RubyString source = runtime.newString();
Encoding hasAsciiCompatFixed = null;
Encoding hasAsciiIncompat = null;
for (int i = 0; i < args.length; i++) {
IRubyObject e = args[i];
if (i > 0) source.cat((byte)'|');
IRubyObject v = TypeConverter.convertToTypeWithCheck(e, runtime.getRegexp(), "to_regexp");
final Encoding enc; final ByteList re;
if (v != context.nil) {
RubyRegexp regex = (RubyRegexp) v;
enc = regex.getEncoding();
if (!enc.isAsciiCompatible()) {
if (hasAsciiIncompat == null) {
hasAsciiIncompat = enc;
} else if (hasAsciiIncompat != enc) {
throw runtime.newArgumentError("incompatible encodings: " + hasAsciiIncompat + " and " + enc);
}
} else if (regex.getOptions().isFixed()) {
if (hasAsciiCompatFixed == null) {
hasAsciiCompatFixed = enc;
} else if (hasAsciiCompatFixed != enc) {
throw runtime.newArgumentError("incompatible encodings: " + hasAsciiCompatFixed + " and " + enc);
}
} else {
hasAsciiOnly = true;
}
re = regex.to_s().getByteList();
} else {
RubyString str = e.convertToString();
enc = str.getEncoding();
if (!enc.isAsciiCompatible()) {
if (hasAsciiIncompat == null) {
hasAsciiIncompat = enc;
} else if (hasAsciiIncompat != enc) {
throw runtime.newArgumentError("incompatible encodings: " + hasAsciiIncompat + " and " + enc);
}
} else if (str.isAsciiOnly()) {
hasAsciiOnly = true;
} else {
if (hasAsciiCompatFixed == null) {
hasAsciiCompatFixed = enc;
} else if (hasAsciiCompatFixed != enc) {
throw runtime.newArgumentError("incompatible encodings: " + hasAsciiCompatFixed + " and " + enc);
}
}
re = quote(str);
}
if (hasAsciiIncompat != null) {
if (hasAsciiOnly) {
throw runtime.newArgumentError("ASCII incompatible encoding: " + hasAsciiIncompat);
}
if (hasAsciiCompatFixed != null) {
throw runtime.newArgumentError("incompatible encodings: " + hasAsciiIncompat + " and " + hasAsciiCompatFixed);
}
}
if (i == 0) source.setEncoding(enc);
source.cat(re);
}
if (hasAsciiIncompat != null) {
source.setEncoding(hasAsciiIncompat);
} else if (hasAsciiCompatFixed != null) {
source.setEncoding(hasAsciiCompatFixed);
} else {
source.setEncoding(ASCIIEncoding.INSTANCE);
}
return runtime.getRegexp().newInstance(context, source, Block.NULL_BLOCK);
}
}
@Deprecated
public static IRubyObject union19(ThreadContext context, IRubyObject recv, IRubyObject[] args) {
return union(context, recv, args);
}
@JRubyMethod(required = 1, visibility = Visibility.PRIVATE)
@Override
public IRubyObject initialize_copy(IRubyObject re) {
if (this == re) return this;
checkFrozen();
if (getMetaClass().getRealClass() != re.getMetaClass().getRealClass()) {
throw getRuntime().newTypeError("wrong argument type");
}
RubyRegexp regexp = (RubyRegexp)re;
regexp.check();
return regexpInitialize(regexp.str, regexp.str.getEncoding(), regexp.getOptions());
}
private static int objectAsJoniOptions(IRubyObject arg) {
if (arg instanceof RubyFixnum) return RubyNumeric.fix2int(arg);
if (arg.isTrue()) return RE_OPTION_IGNORECASE;
return 0;
}
@JRubyMethod(name = "initialize", visibility = Visibility.PRIVATE)
public IRubyObject initialize_m(IRubyObject arg) {
if (arg instanceof RubyRegexp) return initializeByRegexp((RubyRegexp)arg);
return regexpInitializeString(arg.convertToString(), new RegexpOptions());
}
@JRubyMethod(name = "initialize", visibility = Visibility.PRIVATE)
public IRubyObject initialize_m(IRubyObject arg0, IRubyObject arg1) {
if (arg0 instanceof RubyRegexp && Options.PARSER_WARN_FLAGS_IGNORED.load()) {
metaClass.runtime.getWarnings().warn(ID.REGEXP_IGNORED_FLAGS, "flags ignored");
return initializeByRegexp((RubyRegexp)arg0);
}
return regexpInitializeString(arg0.convertToString(),
RegexpOptions.fromJoniOptions(objectAsJoniOptions(arg1)));
}
@JRubyMethod(name = "initialize", visibility = Visibility.PRIVATE)
public IRubyObject initialize_m(IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
if (arg0 instanceof RubyRegexp && Options.PARSER_WARN_FLAGS_IGNORED.load()) {
metaClass.runtime.getWarnings().warn(ID.REGEXP_IGNORED_FLAGS, "flags ignored");
return initializeByRegexp((RubyRegexp)arg0);
}
RegexpOptions newOptions = RegexpOptions.fromJoniOptions(objectAsJoniOptions(arg1));
if (!arg2.isNil()) {
ByteList kcodeBytes = arg2.convertToString().getByteList();
if (kcodeBytes.getRealSize() > 0 && (kcodeBytes.get(0) == 'n' || kcodeBytes.get(0) == 'N')) {
newOptions.setEncodingNone(true);
return regexpInitialize(arg0.convertToString().getByteList(), ASCIIEncoding.INSTANCE, newOptions);
} else {
metaClass.runtime.getWarnings().warn("encoding option is ignored - " + kcodeBytes);
}
}
return regexpInitializeString(arg0.convertToString(), newOptions);
}
@Deprecated
public IRubyObject initialize_m19(IRubyObject arg) {
return initialize_m(arg);
}
@Deprecated
public IRubyObject initialize_m19(IRubyObject arg0, IRubyObject arg1) {
return initialize_m(arg0, arg1);
}
@Deprecated
public IRubyObject initialize_m19(IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
return initialize_m(arg0, arg1, arg2);
}
private IRubyObject initializeByRegexp(RubyRegexp regexp) {
RegexpOptions newOptions = regexp.getOptions().clone();
newOptions.setLiteral(false);
return regexpInitialize(regexp.str, regexp.getEncoding(), newOptions);
}
private RubyRegexp regexpInitializeString(RubyString str, RegexpOptions options) {
if (isLiteral()) throw metaClass.runtime.newSecurityError("can't modify literal regexp");
ByteList bytes = str.getByteList();
Encoding enc = bytes.getEncoding();
if (options.isEncodingNone()) {
if (enc != ASCIIEncoding.INSTANCE) {
if (str.scanForCodeRange() != StringSupport.CR_7BIT) {
RegexpSupport.raiseRegexpError19(metaClass.runtime, bytes, enc, options, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
}
enc = ASCIIEncoding.INSTANCE;
}
}
return regexpInitialize(bytes, enc, options);
}
public final RubyRegexp regexpInitialize(ByteList bytes, Encoding enc, RegexpOptions options) {
Ruby runtime = metaClass.runtime;
this.options = options;
checkFrozen();
if (pattern != null) throw runtime.newTypeError("already initialized regexp");
if (enc.isDummy()) RegexpSupport.raiseRegexpError19(runtime, bytes, enc, options, "can't make regexp with dummy encoding");
Encoding[]fixedEnc = new Encoding[]{null};
ByteList unescaped = RegexpSupport.preprocess(runtime, bytes, enc, fixedEnc, RegexpSupport.ErrorMode.RAISE);
if (fixedEnc[0] != null) {
if ((fixedEnc[0] != enc && options.isFixed()) ||
(fixedEnc[0] != ASCIIEncoding.INSTANCE && options.isEncodingNone())) {
RegexpSupport.raiseRegexpError19(runtime, bytes, enc, options, "incompatible character encoding");
}
if (fixedEnc[0] != ASCIIEncoding.INSTANCE) {
options.setFixed(true);
enc = fixedEnc[0];
}
} else if (!options.isFixed()) {
enc = USASCIIEncoding.INSTANCE;
}
if (fixedEnc[0] != null) options.setFixed(true);
if (options.isEncodingNone()) setEncodingNone();
pattern = getRegexpFromCache(runtime, unescaped, enc, options);
assert bytes != null;
str = bytes;
return this;
}
@JRubyMethod
@Override
public RubyFixnum hash() {
check();
int hash = pattern.getOptions();
int len = str.getRealSize();
int p = str.getBegin();
byte[] bytes = str.getUnsafeBytes();
while (len-- > 0) {
hash = hash * 33 + bytes[p++];
}
return metaClass.runtime.newFixnum(hash + (hash >> 5));
}
@JRubyMethod(name = {"==", "eql?"}, required = 1)
@Override
public IRubyObject op_equal(ThreadContext context, IRubyObject other) {
if (this == other) {
return context.tru;
}
if (!(other instanceof RubyRegexp)) {
return context.fals;
}
RubyRegexp otherRegex = (RubyRegexp) other;
check();
otherRegex.check();
return context.runtime.newBoolean(str.equal(otherRegex.str) && options.equals(otherRegex.options));
}
@Deprecated
public IRubyObject op_match219(ThreadContext context) {
return op_match2(context);
}
@JRubyMethod(name = "~", reads = {LASTLINE, BACKREF}, writes = BACKREF)
public IRubyObject op_match2(ThreadContext context) {
Ruby runtime = context.runtime;
IRubyObject line = context.getLastLine();
if (line instanceof RubyString) {
int start = search(context, (RubyString) line, 0, false);
if (start < 0) return context.nil;
return runtime.newFixnum(start);
}
context.setBackRef(context.nil);
return context.nil;
}
@JRubyMethod(name = "===", required = 1, writes = BACKREF)
public IRubyObject eqq(ThreadContext context, IRubyObject arg) {
arg = operandNoCheck(arg);
if (arg == context.nil) {
context.setBackRef(arg);
return context.fals;
}
int start = search(context, (RubyString) arg, 0, false);
return (start < 0) ? context.fals : context.tru;
}
@Deprecated
public IRubyObject eqq19(ThreadContext context, IRubyObject arg) {
return eqq(context, arg);
}
@Override
@JRubyMethod(name = "=~", required = 1, writes = BACKREF, reads = BACKREF)
public IRubyObject op_match(ThreadContext context, IRubyObject str) {
final RubyString[] strp = { null };
int pos = matchPos(context, str, strp, null, 0);
if (pos < 0) return context.nil;
pos = ((RubyString) strp[0]).subLength(pos);
return RubyFixnum.newFixnum(context.runtime, pos);
}
@Deprecated
public IRubyObject op_match19(ThreadContext context, IRubyObject str) {
return op_match(context, str);
}
@JRubyMethod(name = "match", reads = BACKREF)
public IRubyObject match_m(ThreadContext context, IRubyObject str, Block block) {
return matchCommon(context, str, 0, true, block);
}
@Deprecated
public IRubyObject match_m19(ThreadContext context, IRubyObject str) {
return match_m(context, str, Block.NULL_BLOCK);
}
public final IRubyObject match_m(ThreadContext context, IRubyObject str, boolean useBackref) {
return matchCommon(context, str, 0, useBackref, Block.NULL_BLOCK);
}
@Deprecated
public IRubyObject match_m19(ThreadContext context, IRubyObject str, boolean useBackref, Block block) {
return matchCommon(context, str, 0, useBackref, block);
}
@JRubyMethod(name = "match", reads = BACKREF)
public IRubyObject match_m(ThreadContext context, IRubyObject str, IRubyObject pos, Block block) {
return matchCommon(context, str, RubyNumeric.num2int(pos), true, block);
}
@JRubyMethod(name = "match?")
public IRubyObject match_p(ThreadContext context, IRubyObject str) {
return matchP(context, str, 0);
}
@JRubyMethod(name = "match?")
public IRubyObject match_p(ThreadContext context, IRubyObject str, IRubyObject pos) {
return matchP(context, str, RubyNumeric.num2int(pos));
}
private IRubyObject matchCommon(ThreadContext context, IRubyObject str, int pos, boolean setBackref, Block block) {
IRubyObject[] holder = setBackref ? null : new IRubyObject[1];
if (matchPos(context, str, null, holder, pos) < 0) {
return context.nil;
}
IRubyObject backref = getBackRefInternal(context, holder);
if (block.isGiven()) return block.yield(context, backref);
return backref;
}
private int matchPos(ThreadContext context, IRubyObject arg, RubyString[] strp, IRubyObject[] holder, int pos) {
if (arg == context.nil) {
setBackRefInternal(context, holder, context.nil);
return -1;
}
final RubyString str = operandCheck(arg);
if (strp != null) strp[0] = str;
if (pos != 0) {
if (pos < 0) {
pos += str.strLength();
if (pos < 0) return pos;
}
pos = str.rbStrOffset(pos);
}
return search(context, str, pos, false, holder);
}
private RubyBoolean matchP(ThreadContext context, IRubyObject arg, int pos) {
if (arg == context.nil) return context.fals;
RubyString str = arg instanceof RubySymbol ? ((RubySymbol) arg).to_s(context.runtime) : arg.convertToString();
if (pos != 0) {
if (pos < 0) {
pos += str.strLength();
if (pos < 0) return context.fals;
}
pos = str.rbStrOffset(pos);
}
final Regex reg = preparePattern(str);
final ByteList strBL = str.getByteList();
final int beg = strBL.begin();
Matcher matcher = reg.matcherNoRegion(strBL.unsafeBytes(), beg, beg + strBL.realSize());
try {
final int result = matcherSearch(context, matcher, beg + pos, beg + strBL.realSize(), RE_OPTION_NONE);
return result == -1 ? context.fals : context.tru;
} catch (JOniException je) {
throw context.runtime.newRegexpError(je.getMessage());
}
}
public final int search(ThreadContext context, RubyString str, int pos, boolean reverse) {
return search(context, str, pos, reverse, null);
}
@Deprecated
public final int search19(ThreadContext context, RubyString str, int pos, boolean reverse) {
return search(context, str, pos, reverse);
}
@Deprecated
public final int search19(ThreadContext context, RubyString str, int pos, boolean reverse, IRubyObject[] holder) {
return search(context, str, pos, reverse, holder);
}
public final RubyBoolean startWithP(ThreadContext context, RubyString str) {
final ByteList strBL = str.getByteList();
final int beg = strBL.begin();
final Regex reg = preparePattern(str);
IRubyObject match = getBackRefInternal(context, null);
if (match instanceof RubyMatchData) {
if (((RubyMatchData) match).used()) match = context.nil;
}
final Matcher matcher = reg.matcher(strBL.unsafeBytes(), beg, beg + strBL.realSize());
try {
int result = matcherMatch(context, matcher, beg, beg + strBL.realSize(), RE_OPTION_NONE);
if (result == -1) {
setBackRefInternal(context, null, context.nil);
return context.fals;
}
final RubyMatchData matchData;
if (match == context.nil) {
matchData = createMatchData(context, str, matcher, reg);
} else {
matchData = createMatchData(context, str, matcher, reg);
}
matchData.regexp = this;
matchData.infectBy(this);
setBackRefInternal(context, null, matchData);
return context.tru;
} catch (JOniException je) {
throw context.runtime.newRegexpError(je.getMessage());
}
}
public final int search(ThreadContext context, RubyString str, int pos, boolean reverse, IRubyObject[] holder) {
final ByteList strBL = str.getByteList();
final int beg = strBL.begin();
int range = beg;
if (pos > str.size() || pos < 0) {
setBackRefInternal(context, holder, context.nil);
return -1;
}
final Regex reg = preparePattern(str);
IRubyObject match = getBackRefInternal(context, holder);
if (match instanceof RubyMatchData) {
if (((RubyMatchData) match).used()) {
match = context.nil;
}
}
if (!reverse) range += str.size();
final Matcher matcher = reg.matcher(strBL.unsafeBytes(), beg, beg + strBL.realSize());
try {
int result = matcherSearch(context, matcher, beg + pos, range, RE_OPTION_NONE);
if (result == -1) {
setBackRefInternal(context, holder, context.nil);
return -1;
}
final RubyMatchData matchData;
if (match == context.nil) {
matchData = createMatchData(context, str, matcher, reg);
} else {
matchData = createMatchData(context, str, matcher, reg);
}
matchData.regexp = this;
matchData.infectBy(this);
setBackRefInternal(context, holder, matchData);
return result;
} catch (JOniException je) {
throw context.runtime.newRegexpError(je.getMessage());
}
}
private static IRubyObject getBackRefInternal(ThreadContext context, IRubyObject[] holder) {
return holder != null ? holder[0] : context.getBackRef();
}
private static void setBackRefInternal(ThreadContext context, IRubyObject[] holder, IRubyObject match) {
if (holder != null) {
holder[0] = match;
} else {
context.setBackRef(match);
}
}
static RubyMatchData createMatchData(ThreadContext context, RubyString str, Matcher matcher, Regex pattern) {
final RubyMatchData match = new RubyMatchData(context.runtime);
match.initMatchData(context, str, matcher, pattern);
return match;
}
@JRubyMethod
public IRubyObject options() {
return metaClass.runtime.newFixnum(getOptions().toOptions());
}
@JRubyMethod(name = "casefold?")
public IRubyObject casefold_p(ThreadContext context) {
return context.runtime.newBoolean(getOptions().isIgnorecase());
}
@JRubyMethod
public IRubyObject source() {
check();
Encoding enc = (pattern == null) ? str.getEncoding() : pattern.getEncoding();
ByteList newStr = str.dup();
newStr.setEncoding(enc);
return RubyString.newString(metaClass.runtime, newStr).infectBy(this);
}
public final int length() {
return str.getRealSize();
}
@Override
@JRubyMethod(name = "inspect")
public IRubyObject inspect() {
if (pattern == null) return anyToString();
Ruby runtime = metaClass.runtime;
return RubyString.newString(runtime, RegexpSupport.regexpDescription19(runtime, str, options, str.getEncoding()));
}
@Deprecated
public IRubyObject inspect19() {
return inspect();
}
private final static int EMBEDDABLE = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED;
@Override
@JRubyMethod
public RubyString to_s() {
check();
Ruby runtime = metaClass.runtime;
RegexpOptions newOptions = options.clone();
int p = str.getBegin();
int len = str.getRealSize();
byte[] bytes = str.getUnsafeBytes();
ByteList result = new ByteList(len);
result.append((byte)'(').append((byte)'?');
again: do {
if (len >= 4 && bytes[p] == '(' && bytes[p + 1] == '?') {
boolean err = true;
p += 2;
if ((len -= 2) > 0) {
do {
if (bytes[p] == 'm') {
newOptions.setMultiline(true);
} else if (bytes[p] == 'i') {
newOptions.setIgnorecase(true);
} else if (bytes[p] == 'x') {
newOptions.setExtended(true);
} else {
break;
}
p++;
} while (--len > 0);
}
if (len > 1 && bytes[p] == '-') {
++p;
--len;
do {
if (bytes[p] == 'm') {
newOptions.setMultiline(false);
} else if (bytes[p] == 'i') {
newOptions.setIgnorecase(false);
} else if (bytes[p] == 'x') {
newOptions.setExtended(false);
} else {
break;
}
p++;
} while (--len > 0);
}
if (bytes[p] == ')') {
--len;
++p;
continue again;
}
if (bytes[p] == ':' && bytes[p + len - 1] == ')') {
try {
new Regex(bytes, ++p, p + (len -= 2), Option.DEFAULT, str.getEncoding(), Syntax.DEFAULT, WarnCallback.NONE);
err = false;
} catch (JOniException e) {
err = true;
}
}
if (err) {
newOptions = options;
p = str.getBegin();
len = str.getRealSize();
}
}
RegexpSupport.appendOptions(result, newOptions);
if (!newOptions.isEmbeddable()) {
result.append((byte)'-');
if (!newOptions.isMultiline()) result.append((byte)'m');
if (!newOptions.isIgnorecase()) result.append((byte)'i');
if (!newOptions.isExtended()) result.append((byte)'x');
}
result.append((byte)':');
RegexpSupport.appendRegexpString19(runtime, result, bytes, p, len, str.getEncoding(), null);
result.append((byte)')');
return (RubyString) RubyString.newString(runtime, result, getEncoding()).infectBy(this);
} while (true);
}
public String[] getNames() {
int nameLength = pattern.numberOfNames();
if (nameLength == 0) return EMPTY_STRING_ARRAY;
String[] names = new String[nameLength];
int j = 0;
for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
NameEntry e = i.next();
names[j++] = new String(e.name, e.nameP, e.nameEnd - e.nameP).intern();
}
return names;
}
@JRubyMethod
public IRubyObject names(ThreadContext context) {
check();
final Ruby runtime = context.runtime;
if (pattern.numberOfNames() == 0) return runtime.newEmptyArray();
RubyArray ary = RubyArray.newBlankArray(runtime, pattern.numberOfNames());
int index = 0;
for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
NameEntry e = i.next();
RubyString name = RubyString.newStringShared(runtime, e.name, e.nameP, e.nameEnd - e.nameP, pattern.getEncoding());
ary.storeInternal(index++, name);
}
return ary;
}
@JRubyMethod
public IRubyObject named_captures(ThreadContext context) {
check();
final Ruby runtime = context.runtime;
RubyHash hash = RubyHash.newHash(runtime);
if (pattern.numberOfNames() == 0) return hash;
for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
NameEntry e = i.next();
int[] backrefs = e.getBackRefs();
RubyArray ary = RubyArray.newBlankArrayInternal(runtime, backrefs.length);
for (int idx = 0; idx<backrefs.length; idx++) {
ary.storeInternal(idx, RubyFixnum.newFixnum(runtime, backrefs[idx]));
}
RubyString name = RubyString.newStringShared(runtime, e.name, e.nameP, e.nameEnd - e.nameP);
hash.fastASet(name.freeze(context), ary);
}
return hash;
}
@JRubyMethod
public IRubyObject encoding(ThreadContext context) {
Encoding enc = (pattern == null) ? str.getEncoding() : pattern.getEncoding();
return context.runtime.getEncodingService().getEncoding(enc);
}
@JRubyMethod(name = "fixed_encoding?")
public IRubyObject fixed_encoding_p(ThreadContext context) {
return context.runtime.newBoolean(options.isFixed());
}
public static IRubyObject nth_match(int nth, IRubyObject match) {
if (match.isNil()) return match;
RubyMatchData m = (RubyMatchData)match;
m.check();
Ruby runtime = m.getRuntime();
final int start, end;
if (m.regs == null) {
if (nth >= 1 || (nth < 0 && ++nth <= 0)) return runtime.getNil();
start = m.begin;
end = m.end;
} else {
if (nth >= m.regs.numRegs || (nth < 0 && (nth+=m.regs.numRegs) <= 0)) return runtime.getNil();
start = m.regs.beg[nth];
end = m.regs.end[nth];
}
if (start == -1) return runtime.getNil();
RubyString str = m.str.makeShared(runtime, start, end - start);
str.infectBy(m);
return str;
}
public static IRubyObject last_match(IRubyObject match) {
return nth_match(0, match);
}
public static IRubyObject match_pre(IRubyObject match) {
if (match.isNil()) return match;
RubyMatchData m = (RubyMatchData)match;
m.check();
Ruby runtime = m.getRuntime();
if (m.begin == -1) return runtime.getNil();
return m.str.makeShared(runtime, 0, m.begin).infectBy(m);
}
public static IRubyObject match_post(IRubyObject match) {
if (match.isNil()) return match;
RubyMatchData m = (RubyMatchData)match;
m.check();
Ruby runtime = m.getRuntime();
if (m.begin == -1) return runtime.getNil();
return m.str.makeShared(runtime, m.end, m.str.getByteList().getRealSize() - m.end).infectBy(m);
}
public static IRubyObject match_last(IRubyObject match) {
if (match.isNil()) return match;
RubyMatchData m = (RubyMatchData)match;
m.check();
if (m.regs == null || m.regs.beg[0] == -1) return match.getRuntime().getNil();
int i;
for (i = m.regs.numRegs - 1; m.regs.beg[i] == -1 && i > 0; i--);
if (i == 0) return match.getRuntime().getNil();
return nth_match(i, match);
}
private static final int ASCGET(boolean acompat, byte[] sBytes, int s, int e, int[] cl, Encoding strEnc) {
if (acompat) {
cl[0] = 1;
return Encoding.isAscii(sBytes[s]) ? sBytes[s] & 0xFF : -1;
} else {
return EncodingUtils.encAscget(sBytes, s, e, cl, strEnc);
}
}
static RubyString regsub(ThreadContext context, RubyString str, RubyString src, Regex pattern, Matcher matcher) {
return regsub(context, str, src, pattern, matcher.getRegion(), matcher.getBegin(), matcher.getEnd());
}
static RubyString regsub(ThreadContext context, RubyString str, RubyString src, Regex pattern, Region regs,
final int begin, final int end) {
Ruby runtime = context.runtime;
RubyString val = null;
int p, s, e;
int no = 0, clen[] = {0};
Encoding strEnc = EncodingUtils.encGet(context, str);
Encoding srcEnc = EncodingUtils.encGet(context, src);
boolean acompat = EncodingUtils.encAsciicompat(strEnc);
ByteList bs = str.getByteList();
ByteList srcbs = src.getByteList();
byte[] sBytes = bs.getUnsafeBytes();
p = s = bs.getBegin();
e = p + bs.getRealSize();
while (s < e) {
int c = ASCGET(acompat, sBytes, s, e, clen, strEnc);
if (c == -1) {
s += StringSupport.length(strEnc, sBytes, s, e);
continue;
}
int ss = s;
s += clen[0];
if (c != '\\' || s == e) continue;
if (val == null) {
val = RubyString.newString(runtime, new ByteList(ss - p));
}
EncodingUtils.encStrBufCat(runtime, val, sBytes, p, ss - p, strEnc);
c = ASCGET(acompat, sBytes, s, e, clen, strEnc);
if (c == -1) {
s += StringSupport.length(strEnc, sBytes, s, e);
EncodingUtils.encStrBufCat(runtime, val, sBytes, ss, s - ss, strEnc);
p = s;
continue;
}
s += clen[0];
p = s;
switch (c) {
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (pattern.noNameGroupIsActive(Syntax.RUBY)) {
no = c - '0';
break;
} else {
continue;
}
case 'k':
if (s < e && ASCGET(acompat, sBytes, s, e, clen, strEnc) == '<') {
int name = s + clen[0];
int nameEnd = name;
while (nameEnd < e) {
c = ASCGET(acompat, sBytes, nameEnd, e, clen, strEnc);
if (c == '>') break;
nameEnd += c == -1 ? StringSupport.length(strEnc, sBytes, nameEnd, e) : clen[0];
}
if (nameEnd < e) {
try {
no = pattern.nameToBackrefNumber(sBytes, name, nameEnd, regs);
} catch (JOniException je) {
throw runtime.newIndexError(je.getMessage());
}
p = s = nameEnd + clen[0];
break;
} else {
throw runtime.newRuntimeError("invalid group name reference format");
}
}
EncodingUtils.encStrBufCat(runtime, val, sBytes, ss, s - ss, strEnc);
continue;
case '0': case '&':
no = 0;
break;
case '`':
EncodingUtils.encStrBufCat(runtime, val, srcbs.getUnsafeBytes(), srcbs.getBegin(), begin, srcEnc);
continue;
case '\'':
EncodingUtils.encStrBufCat(runtime, val, srcbs.getUnsafeBytes(), srcbs.getBegin() + end, srcbs.getRealSize() - end, srcEnc);
continue;
case '+':
if (regs != null) {
no = regs.numRegs - 1;
while (regs.beg[no] == -1 && no > 0) no--;
}
if (no == 0) continue;
break;
case '\\':
EncodingUtils.encStrBufCat(runtime, val, sBytes, s - clen[0], clen[0], strEnc);
continue;
default:
EncodingUtils.encStrBufCat(runtime, val, sBytes, ss, s - ss, strEnc);
continue;
}
if (regs != null) {
if (no >= 0) {
if (no >= regs.numRegs) continue;
if (regs.beg[no] == -1) continue;
EncodingUtils.encStrBufCat(runtime, val, srcbs.getUnsafeBytes(), srcbs.getBegin() + regs.beg[no], regs.end[no] - regs.beg[no], srcEnc);
}
} else {
if (no != 0 || begin == -1) continue;
EncodingUtils.encStrBufCat(runtime, val, srcbs.getUnsafeBytes(), srcbs.getBegin() + begin, end - begin, srcEnc);
}
}
if (val == null) return str;
if (p < e) EncodingUtils.encStrBufCat(runtime, val, sBytes, p, e - p, strEnc);
return val;
}
final int adjustStartPos(RubyString str, int pos, boolean reverse) {
check();
return adjustStartPosInternal(str, pattern.getEncoding(), pos, reverse);
}
private static int adjustStartPosInternal(RubyString str, Encoding enc, int pos, boolean reverse) {
ByteList value = str.getByteList();
int len = value.getRealSize();
if (pos > 0 && enc.maxLength() != 1 && pos < len) {
int start = value.getBegin();
if ((reverse ? -pos : len - pos) > 0) {
return enc.rightAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start;
} else {
return enc.leftAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start;
}
}
return pos;
}
private static IRubyObject operandNoCheck(IRubyObject str) {
return regOperand(str, false);
}
private static RubyString operandCheck(IRubyObject str) {
return (RubyString) regOperand(str, true);
}
private static IRubyObject regOperand(IRubyObject str, boolean check) {
if (str instanceof RubySymbol) return ((RubySymbol) str).to_s();
return check ? str.convertToString() : str.checkStringType();
}
@Deprecated
public static RubyRegexp unmarshalFrom(UnmarshalStream input) throws java.io.IOException {
RubyRegexp result = newRegexp(input.getRuntime(), input.unmarshalString(), RegexpOptions.fromJoniOptions(input.readSignedByte()));
input.registerLinkTarget(result);
return result;
}
public static void marshalTo(RubyRegexp regexp, MarshalStream output) throws java.io.IOException {
output.registerLinkTarget(regexp);
output.writeString(regexp.str);
int options = regexp.pattern.getOptions() & EMBEDDABLE;
if (regexp.getOptions().isFixed()) options |= RE_FIXED;
output.writeByte(options);
}
}