/*
 ***** BEGIN LICENSE BLOCK *****
 * Version: EPL 2.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v20.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/

package org.jruby.ext.strscan;

import org.jcodings.Encoding;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
import org.joni.Region;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBoolean;
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyMatchData;
import org.jruby.RubyNumeric;
import org.jruby.RubyObject;
import org.jruby.RubyRegexp;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.RubyThread;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.common.IRubyWarnings.ID;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;

import static org.jruby.runtime.Visibility.PRIVATE;

Author:kscott
/** * @author kscott * */
@JRubyClass(name="StringScanner") public class RubyStringScanner extends RubyObject { private RubyString str; private int pos = 0; private int lastPos = -1; private Region regs; private Regex pattern; private int beg = -1; private int end = -1; // not to be confused with RubyObject's flags private int scannerFlags; private static final int MATCHED_STR_SCN_F = 1 << 11; private static ObjectAllocator STRINGSCANNER_ALLOCATOR = new ObjectAllocator() { @Override public IRubyObject allocate(Ruby runtime, RubyClass klass) { return new RubyStringScanner(runtime, klass); } }; public static RubyClass createScannerClass(final Ruby runtime) { RubyClass scannerClass = runtime.defineClass("StringScanner", runtime.getObject(), STRINGSCANNER_ALLOCATOR); scannerClass.defineAnnotatedMethods(RubyStringScanner.class); ThreadContext context = runtime.getCurrentContext(); scannerClass.setConstant("Version", runtime.newString("0.7.0").freeze(context)); scannerClass.setConstant("Id", runtime.newString("$Id: strscan.c 13506 2007-09-24 08:56:24Z nobu $").freeze(context)); RubyClass standardError = runtime.getStandardError(); RubyClass error = scannerClass.defineClassUnder( "Error", standardError, standardError.getAllocator()); RubyClass objClass = runtime.getObject(); if (!objClass.isConstantDefined("ScanError")) { objClass.defineConstant("ScanError", error); } return scannerClass; } private void clearMatched() { scannerFlags &= ~MATCHED_STR_SCN_F; } private void setMatched() { scannerFlags |= MATCHED_STR_SCN_F; } private boolean isMatched() { return (scannerFlags & MATCHED_STR_SCN_F) != 0; } private void check() { if (str == null) throw getRuntime().newArgumentError("uninitialized StringScanner object"); } protected RubyStringScanner(Ruby runtime, RubyClass type) { super(runtime, type); } // second argument is allowed, but ignored (MRI) @JRubyMethod(required = 1, optional = 1, visibility = PRIVATE) public IRubyObject initialize(IRubyObject[] args, Block unusedBlock) { str = args[0].convertToString(); return this; } @JRubyMethod(visibility = PRIVATE) @Override public IRubyObject initialize_copy(IRubyObject other) { if (this == other) return this; if (!(other instanceof RubyStringScanner)) { throw getRuntime().newTypeError("wrong argument type " + other.getMetaClass() + " (expected StringScanner)"); } RubyStringScanner otherScanner = (RubyStringScanner)other; str = otherScanner.str; pos = otherScanner.pos; lastPos = otherScanner.lastPos; scannerFlags = otherScanner.scannerFlags; regs = otherScanner.regs != null ? otherScanner.regs.clone() : null; pattern = otherScanner.pattern; beg = otherScanner.beg; end = otherScanner.end; return this; } @JRubyMethod(name = "reset") public IRubyObject reset() { check(); pos = 0; clearMatched(); return this; } @JRubyMethod(name = "terminate") public IRubyObject terminate() { check(); pos = str.getByteList().getRealSize(); clearMatched(); return this; } @JRubyMethod(name = "clear") public IRubyObject clear(ThreadContext context) { check(); Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#clear is obsolete; use #terminate instead"); } return terminate(); } @JRubyMethod(name = "string") public RubyString string() { return str; } @JRubyMethod(name = "string=", required = 1) public IRubyObject set_string(ThreadContext context, IRubyObject str) { this.str = RubyString.stringValue(str); pos = 0; clearMatched(); return str; } @JRubyMethod(name = {"concat", "<<"}, required = 1) public IRubyObject concat(IRubyObject obj) { check(); str.append(obj.convertToString()); return this; } @JRubyMethod(name = {"pos", "pointer"}) public RubyFixnum pos() { check(); return RubyFixnum.newFixnum(getRuntime(), pos); } @JRubyMethod(name = {"pos=", "pointer="}) public IRubyObject set_pos(IRubyObject pos) { check(); int i = RubyNumeric.num2int(pos); int size = str.getByteList().getRealSize(); if (i < 0) i += size; if (i < 0 || i > size) throw getRuntime().newRangeError("index out of range."); this.pos = i; return RubyFixnum.newFixnum(getRuntime(), i); } @JRubyMethod(name = "charpos") public IRubyObject charpos(ThreadContext context) { Ruby runtime = context.runtime; RubyString sub = (RubyString)Helpers.invoke(context, str, "byteslice", runtime.newFixnum(0), runtime.newFixnum(pos)); return runtime.newFixnum(sub.strLength()); } private IRubyObject extractRange(Ruby runtime, int beg, int end) { int size = str.getByteList().getRealSize(); if (beg > size) return runtime.getNil(); if (end > size) end = size; return str.makeSharedString(runtime, beg, end - beg); } private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { assert len >= 0; int size = str.getByteList().getRealSize(); if (beg > size) return runtime.getNil(); if (beg + len > size) len = size - beg; return str.makeSharedString(runtime, beg, len); } ThreadLocal<Matcher> currentMatcher = new ThreadLocal<>(); RubyThread.Task<RubyStringScanner, Integer> task = new RubyThread.Task<RubyStringScanner, Integer>() { @Override public Integer run(ThreadContext context, RubyStringScanner rubyStringScanner) throws InterruptedException { ByteList value = str.getByteList(); return currentMatcher.get().matchInterruptible(value.begin() + pos, value.begin() + value.realSize(), Option.NONE); } @Override public void wakeup(RubyThread thread, RubyStringScanner rubyStringScanner) { thread.getNativeThread().interrupt(); } }; private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) { final Ruby runtime = getRuntime(); if (!(regex instanceof RubyRegexp)) throw runtime.newTypeError("wrong argument type " + regex.getMetaClass() + " (expected Regexp)"); check(); pattern = ((RubyRegexp)regex).preparePattern(str); clearMatched(); int rest = str.getByteList().getRealSize() - pos; if (rest < 0) return runtime.getNil(); ByteList value = str.getByteList(); int beg = value.getBegin() + pos; int range = value.getBegin() + value.getRealSize(); Matcher matcher = pattern.matcher(value.getUnsafeBytes(), beg, range); final int ret; if (headonly) { if (runtime.getInstanceConfig().isInterruptibleRegexps()) { currentMatcher.set(matcher); try { ret = runtime.getCurrentContext().getThread().executeTask(context, this, task); } catch (InterruptedException ie) { throw runtime.newInterruptedRegexpError("Regexp Interrupted"); } } else { ret = matcher.match(beg, range, Option.NONE); } } else { ret = RubyRegexp.matcherSearch(context, matcher, beg, range, Option.NONE); } regs = matcher.getRegion(); if (regs == null) { this.beg = matcher.getBegin(); this.end = matcher.getEnd(); } else { this.beg = regs.beg[0]; this.end = regs.end[0]; } if (ret < 0) return context.nil; setMatched(); lastPos = pos; if (succptr) pos += end; return getstr ? extractBegLen(runtime, lastPos, end) : RubyFixnum.newFixnum(runtime, end); } @JRubyMethod(name = "scan", required = 1) public IRubyObject scan(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, true, true); } @JRubyMethod(name = "match?", required = 1) public IRubyObject match_p(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, false, true); } @JRubyMethod(name = "skip", required = 1) public IRubyObject skip(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, false, true); } @JRubyMethod(name = "check", required = 1) public IRubyObject check(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, true, true); } @JRubyMethod(name = "scan_full", required = 3) public IRubyObject scan_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { return scan(context, regex, s.isTrue(), f.isTrue(), true); } @JRubyMethod(name = "scan_until", required = 1) public IRubyObject scan_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, true, false); } @JRubyMethod(name = "exist?", required = 1) public IRubyObject exist_p(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, false, false); } @JRubyMethod(name = "skip_until", required = 1) public IRubyObject skip_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, false, false); } @JRubyMethod(name = "check_until", required = 1) public IRubyObject check_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, true, false); } @JRubyMethod(name = "search_full", required = 3) public IRubyObject search_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { return scan(context, regex, s.isTrue(), f.isTrue(), false); } private void adjustRegisters() { beg = 0; end = pos - lastPos; regs = null; } public IRubyObject getch(ThreadContext context) { return getch19(context); } @JRubyMethod(name = "getch") public IRubyObject getch19(ThreadContext context) { return getchCommon(context, true); } public IRubyObject getchCommon(ThreadContext context, boolean is1_9) { check(); clearMatched(); Ruby runtime = context.runtime; ByteList value = str.getByteList(); if (pos >= value.getRealSize()) return context.nil; int len; if (is1_9) { Encoding enc = str.getEncoding(); len = enc.isSingleByte() ? 1 : StringSupport.length(enc, value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); } else { Encoding enc = runtime.getKCode().getEncoding(); len = enc.isSingleByte() ? 1 : enc.length(value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); } if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; lastPos = pos; pos += len; setMatched(); adjustRegisters(); return extractRange(runtime, lastPos + beg, lastPos + end); } @JRubyMethod(name = "get_byte") public IRubyObject get_byte(ThreadContext context) { check(); clearMatched(); if (pos >= str.getByteList().getRealSize()) return context.nil; lastPos = pos; pos++; setMatched(); adjustRegisters(); return extractRange(context.runtime, lastPos + beg, lastPos + end); } @JRubyMethod(name = "getbyte") public IRubyObject getbyte(ThreadContext context) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#getbyte is obsolete; use #get_byte instead"); } return get_byte(context); } @JRubyMethod(name = "peek", required = 1) public IRubyObject peek(ThreadContext context, IRubyObject length) { check(); int len = RubyNumeric.num2int(length); if (len < 0) { throw context.runtime.newArgumentError("negative string size (or size too big)"); } ByteList value = str.getByteList(); if (pos >= value.getRealSize()) return RubyString.newEmptyString(context.runtime).infectBy(str); if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; return extractBegLen(context.runtime, pos, len); } @JRubyMethod(name = "peep", required = 1) public IRubyObject peep(ThreadContext context, IRubyObject length) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning( ID.DEPRECATED_METHOD, "StringScanner#peep is obsolete; use #peek instead"); } return peek(context, length); } @JRubyMethod(name = "unscan") public IRubyObject unscan() { check(); Ruby runtime = getRuntime(); if (!isMatched()) { RubyClass errorClass = runtime.getClass("StringScanner").getClass("Error"); throw RaiseException.from(runtime, errorClass, "unscan failed: previous match had failed"); } pos = lastPos; clearMatched(); return this; } @JRubyMethod(name = "beginning_of_line?", alias = "bol?") public IRubyObject bol_p() { check(); Ruby runtime = getRuntime(); ByteList value = str.getByteList(); if (pos > value.getRealSize()) return runtime.getNil(); if (pos == 0) return runtime.getTrue(); return value.getUnsafeBytes()[(value.getBegin() + pos) - 1] == (byte)'\n' ? runtime.getTrue() : runtime.getFalse(); } @JRubyMethod(name = "eos?") public RubyBoolean eos_p(ThreadContext context) { check(); return pos >= str.getByteList().getRealSize() ? context.tru : context.fals; } @JRubyMethod(name = "empty?") public RubyBoolean empty_p(ThreadContext context) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#empty? is obsolete; use #eos? instead"); } return eos_p(context); } @JRubyMethod(name = "rest?") public RubyBoolean rest_p(ThreadContext context) { check(); return pos >= str.getByteList().getRealSize() ? context.fals : context.tru; } @JRubyMethod(name = "matched?") public RubyBoolean matched_p(ThreadContext context) { check(); return isMatched() ? context.tru : context.fals; } @JRubyMethod(name = "matched") public IRubyObject matched(ThreadContext context) { check(); if (!isMatched()) return context.nil; return extractRange(context.runtime, lastPos + beg, lastPos + end); } @JRubyMethod(name = "matched_size") public IRubyObject matched_size() { check(); if (!isMatched()) return getRuntime().getNil(); return RubyFixnum.newFixnum(getRuntime(), end - beg); } @JRubyMethod(name = "matchedsize") public IRubyObject matchedsize(ThreadContext context) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#matchedsize is obsolete; use #matched_size instead"); } return matched_size(); } @JRubyMethod(name = "[]", required = 1) public IRubyObject op_aref(ThreadContext context, IRubyObject idx) { Ruby runtime = context.runtime; check(); if (!isMatched()) { return context.nil; } if (idx instanceof RubySymbol || idx instanceof RubyString) { if (pattern == null) return context.nil; } int i = RubyMatchData.backrefNumber(runtime, pattern, regs, idx); int numRegs = regs == null ? 1 : regs.numRegs; if (i < 0) i += numRegs; if (i < 0 || i >= numRegs) { return context.nil; } if (regs == null) { assert i == 0; if (beg == -1) return context.nil; return extractRange(runtime, lastPos + beg, lastPos + end); } else { if (regs.beg[i] == -1) return context.nil; return extractRange(context.runtime, lastPos + regs.beg[i], lastPos + regs.end[i]); } } @JRubyMethod(name = "pre_match") public IRubyObject pre_match(ThreadContext context) { check(); if (!isMatched()) { return context.nil; } return extractRange(context.runtime, 0, lastPos + beg); } @JRubyMethod(name = "post_match") public IRubyObject post_match(ThreadContext context) { check(); if (!isMatched()) { return context.nil; } return extractRange(context.runtime, lastPos + end, str.getByteList().getRealSize()); } @JRubyMethod(name = "rest") public IRubyObject rest(ThreadContext context) { check(); ByteList value = str.getByteList(); if (pos >= value.getRealSize()) { return RubyString.newEmptyString(context.runtime).infectBy(str); } return extractRange(context.runtime, pos, value.getRealSize()); } @JRubyMethod(name = "rest_size") public RubyFixnum rest_size() { check(); ByteList value = str.getByteList(); if (pos >= value.getRealSize()) return RubyFixnum.zero(getRuntime()); return RubyFixnum.newFixnum(getRuntime(), value.getRealSize() - pos); } @JRubyMethod(name = "restsize") public RubyFixnum restsize(ThreadContext context) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#restsize is obsolete; use #rest_size instead"); } return rest_size(); } @JRubyMethod(name = "inspect") @Override public IRubyObject inspect() { if (str == null) return inspect("(uninitialized)"); if (pos >= str.getByteList().getRealSize()) return inspect("fin"); if (pos == 0) return inspect(pos + "/" + str.getByteList().getRealSize() + " @ " + inspect2()); return inspect(pos + "/" + str.getByteList().getRealSize() + " " + inspect1() + " @ " + inspect2()); } private IRubyObject inspect(String msg) { RubyString result = getRuntime().newString("#<" + getMetaClass() + " " + msg + ">"); if (str != null) result.infectBy(str); return result; } private static final int INSPECT_LENGTH = 5; private static final byte[] DOT_BYTES = "...".getBytes(); private IRubyObject inspect1() { final Ruby runtime = getRuntime(); if (pos == 0) return RubyString.newEmptyString(runtime); if (pos > INSPECT_LENGTH) { return RubyString.newStringNoCopy(runtime, DOT_BYTES). append(str.substr(runtime, pos - INSPECT_LENGTH, INSPECT_LENGTH)).inspect(); } return str.substr(runtime, 0, pos).inspect(); } private IRubyObject inspect2() { final Ruby runtime = getRuntime(); if (pos >= str.getByteList().getRealSize()) return RubyString.newEmptyString(runtime); int len = str.getByteList().getRealSize() - pos; if (len > INSPECT_LENGTH) { return ((RubyString) str.substr(runtime, pos, INSPECT_LENGTH)).cat(DOT_BYTES).inspect(); } return str.substr(runtime, pos, len).inspect(); } @JRubyMethod(name = "must_C_version", meta = true) public static IRubyObject mustCversion(IRubyObject recv) { return recv; } @JRubyMethod(name = "size") public IRubyObject size(ThreadContext context) { if (!isMatched()) return context.nil; return context.runtime.newFixnum(regs.numRegs); } @JRubyMethod(name = "captures") public IRubyObject captures(ThreadContext context) { int i, numRegs; RubyArray newAry; if (!isMatched()) return context.nil; Ruby runtime = context.runtime; numRegs = regs.numRegs; newAry = RubyArray.newArray(runtime, numRegs); for (i = 1; i < numRegs; i++) { IRubyObject str = extractRange(runtime, lastPos + regs.beg[i], lastPos + regs.end[i]); newAry.push(str); } return newAry; } @JRubyMethod(name = "values_at", rest = true) public IRubyObject values_at(ThreadContext context, IRubyObject[] args) { int i; RubyArray newAry; if (!isMatched()) return context.nil; Ruby runtime = context.runtime; newAry = RubyArray.newArray(runtime, args.length); for (i = 0; i < args.length; i++) { newAry.push(op_aref(context, args[i])); } return newAry; } }