package org.jruby.util;
import static org.jcodings.Encoding.CHAR_INVALID;
import static org.jruby.RubyEnumerator.enumeratorize;
import org.jcodings.Config;
import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.constants.CharacterType;
import org.jcodings.exception.EncodingError;
import org.jcodings.exception.EncodingException;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.util.IntHash;
import org.joni.Matcher;
import org.jruby.ObjectFlags;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyEncoding;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.ast.util.ArgsUtil;
import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.collections.IntHashMap;
import org.jruby.util.io.EncodingUtils;
import sun.misc.Unsafe;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.jruby.RubyString.scanForCodeRange;
public final class StringSupport {
public static final int CR_7BIT_F = ObjectFlags.CR_7BIT_F;
public static final int CR_VALID_F = ObjectFlags.CR_VALID_F;
public static final int CR_UNKNOWN = 0;
public static final int CR_7BIT = 16;
public static final int CR_VALID = 32;
static {
assert CR_7BIT == CR_7BIT_F : "CR_7BIT = " + CR_7BIT + " but should be " + CR_7BIT_F;
assert CR_VALID == CR_VALID_F : "CR_VALID = " + CR_VALID + " but should be " + CR_VALID_F;
}
public static final int CR_BROKEN = CR_7BIT | CR_VALID;
public static final int CR_MASK = CR_7BIT | CR_VALID;
static final int ARRAY_BYTE_BASE_OFFSET;
static {
final Unsafe unsafe = org.jruby.util.unsafe.UnsafeHolder.U;
ARRAY_BYTE_BASE_OFFSET = unsafe != null ? unsafe.arrayBaseOffset(byte[].class) : 0;
}
public static final int TRANS_SIZE = 256;
public static final ByteList[] EMPTY_BYTELIST_ARRAY = new ByteList[0];
public static final String[] EMPTY_STRING_ARRAY = new String[0];
public static List<String> split(final String str, final char sep) {
return split(str, sep, 0);
}
public static List<String> split(final String str, final char sep, final int lim) {
final int len = str.length();
if ( len == 0 ) return Collections.singletonList(str);
final ArrayList<String> result = new ArrayList<>(lim <= 0 ? 8 : lim);
int e; int s = 0; int count = 0;
while ( (e = str.indexOf(sep, s)) != -1 ) {
if ( lim == ++count ) {
result.add(str.substring(s));
return result;
}
result.add(str.substring(s, e));
s = e + 1;
}
if ( s < len || ( s == len && lim > 0 ) ) result.add(str.substring(s));
return result;
}
public static boolean startsWith(final CharSequence str, final String prefix) {
int p = prefix.length();
if ( p > str.length() ) return false;
int i = 0;
while ( --p >= 0 ) {
if (str.charAt(i) != prefix.charAt(i)) return false;
i++;
}
return true;
}
public static boolean startsWith(final CharSequence str, final char c) {
return str.length() >= 1 && str.charAt(0) == c;
}
public static boolean startsWith(final CharSequence str, final char c1, final char c2) {
return str.length() >= 2 && str.charAt(0) == c1 && str.charAt(1) == c2;
}
public static int lastIndexOf(final CharSequence str, final char c, int index) {
while ( index >= 0 ) {
if ( str.charAt(index) == c ) return index;
index--;
}
return -1;
}
public static boolean contentEquals(final CharSequence str, final int chr) {
return (str.length() == 1) && str.charAt(0) == chr;
}
public static boolean contentEquals(final CharSequence str, final int chr1, final int chr2) {
return (str.length() == 2) && str.charAt(0) == chr1 && str.charAt(1) == chr2;
}
public static CharSequence concat(final CharSequence str1, final CharSequence str2) {
return new StringBuilder(str1.length() + str2.length()).append(str1).append(str2);
}
public static String concat(final String str1, final String str2) {
return new StringBuilder(str1.length() + str2.length()).append(str1).append(str2).toString();
}
public static String delete(final String str, final char c) {
char[] ary = null; int end = 0, s = 0;
for (int i = 0; i < str.length(); i++) {
if (str.charAt(i) == c) {
if (ary == null) {
ary = new char[str.length() - 1];
}
end = copy(str, s, i - s, ary, end);
s = i + 1;
}
}
return ary == null ? str : new String(ary, 0, end);
}
public static CharSequence replaceFirst(final String str, final String sub, final String repl) {
return replaceImpl(str, sub, repl, 1, false);
}
public static CharSequence replaceAll(final String str, final String sub, final String repl) {
return replaceImpl(str, sub, repl, -1, false);
}
private static CharSequence replaceImpl(final String str, String sub, final String repl, int max, final boolean ignoreCase) {
if (str.length() == 0 || sub.length() == 0) return str;
String search = str;
if (ignoreCase) {
search = str.toLowerCase();
sub = sub.toLowerCase();
}
int start = 0;
int end = search.indexOf(sub, start);
if (end == -1) return str;
final int replLength = sub.length();
int increase = repl.length() - replLength;
increase = increase < 0 ? 0 : increase;
increase *= max < 0 ? 16 : max > 64 ? 64 : max;
final StringBuilder buf = new StringBuilder(str.length() + increase);
while (end != -1) {
buf.append(str, start, end).append(repl);
start = end + replLength;
if (--max == 0) break;
end = search.indexOf(sub, start);
}
buf.append(str, start, str.length());
return buf;
}
private static int copy(final String str, final int soff, final int slen, final char[] dest, int doff) {
switch(slen) {
case 0:
break;
case 1:
dest[doff++] = str.charAt(soff);
break;
case 2:
dest[doff++] = str.charAt(soff);
dest[doff++] = str.charAt(soff + 1);
break;
case 3:
dest[doff++] = str.charAt(soff);
dest[doff++] = str.charAt(soff + 1);
dest[doff++] = str.charAt(soff + 2);
break;
default:
for (int s = soff; s < slen; s++) dest[doff++] = str.charAt(s);
}
return doff;
}
public static String codeRangeAsString(int codeRange) {
switch (codeRange) {
case CR_UNKNOWN: return "unknown";
case CR_7BIT: return "7bit";
case CR_VALID: return "valid";
case CR_BROKEN: return "broken";
}
return "???";
}
public static int encFastMBCLen(byte[] bytes, int p, int e, Encoding enc) {
return enc.length(bytes, p, e);
}
public static int length(Encoding enc, byte[]bytes, int p, int end) {
int n = enc.length(bytes, p, end);
if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= end - p) return MBCLEN_CHARFOUND_LEN(n);
int min = enc.minLength();
return min <= end - p ? min : end - p;
}
public static int preciseLength(Encoding enc, byte[]bytes, int p, int end) {
if (p >= end) return MBCLEN_NEEDMORE(1);
int n = enc.length(bytes, p, end);
if (n > end - p) return MBCLEN_NEEDMORE(n - (end - p));
return n;
}
public static boolean MBCLEN_NEEDMORE_P(int r) {
return r < -1;
}
public static int MBCLEN_NEEDMORE(int n) {
return -1 - n;
}
public static int MBCLEN_NEEDMORE_LEN(int r) {
return -1 - r;
}
public static boolean MBCLEN_INVALID_P(int r) {
return r == -1;
}
public static int MBCLEN_CHARFOUND_LEN(int r) {
return r;
}
public static boolean MBCLEN_CHARFOUND_P(int r) {
return 0 < r;
}
public static int CONSTRUCT_MBCLEN_CHARFOUND(int n) {
return n;
}
public static int searchNonAscii(byte[]bytes, int p, int end) {
while (p < end) {
if (!Encoding.isAscii(bytes[p])) return p;
p++;
}
return -1;
}
public static int searchNonAscii(ByteList bytes) {
return searchNonAscii(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public static int codeRangeScan(Encoding enc, byte[]bytes, int p, int len) {
if (enc == ASCIIEncoding.INSTANCE) {
return searchNonAscii(bytes, p, p + len) != -1 ? CR_VALID : CR_7BIT;
}
if (enc.isAsciiCompatible()) {
return codeRangeScanAsciiCompatible(enc, bytes, p, len);
}
return codeRangeScanNonAsciiCompatible(enc, bytes, p, len);
}
private static int codeRangeScanAsciiCompatible(Encoding enc, byte[]bytes, int p, int len) {
int end = p + len;
p = searchNonAscii(bytes, p, end);
if (p == -1) return CR_7BIT;
while (p < end) {
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) return CR_BROKEN;
p += cl;
if (p < end) {
p = searchNonAscii(bytes, p, end);
if (p == -1) return CR_VALID;
}
}
return p > end ? CR_BROKEN : CR_VALID;
}
private static int codeRangeScanNonAsciiCompatible(Encoding enc, byte[]bytes, int p, int len) {
int end = p + len;
while (p < end) {
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) return CR_BROKEN;
p += cl;
}
return p > end ? CR_BROKEN : CR_VALID;
}
public static int codeRangeScan(Encoding enc, ByteList bytes) {
return codeRangeScan(enc, bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getRealSize());
}
public static long codeRangeScanRestartable(Encoding enc, byte[]bytes, int s, int end, int cr) {
if (cr == CR_BROKEN) return pack(end - s, cr);
int p = s;
if (enc == ASCIIEncoding.INSTANCE) {
return pack(end - s, searchNonAscii(bytes, p, end) == -1 && cr != CR_VALID ? CR_7BIT : CR_VALID);
} else if (enc.isAsciiCompatible()) {
p = searchNonAscii(bytes, p, end);
if (p == -1) return pack(end - s, cr != CR_VALID ? CR_7BIT : cr);
while (p < end) {
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) return pack(p - s, cl == CHAR_INVALID ? CR_BROKEN : CR_UNKNOWN);
p += cl;
if (p < end) {
p = searchNonAscii(bytes, p, end);
if (p == -1) return pack(end - s, CR_VALID);
}
}
} else {
while (p < end) {
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) return pack(p - s, cl == CHAR_INVALID ? CR_BROKEN: CR_UNKNOWN);
p += cl;
}
}
return pack(p - s, p > end ? CR_BROKEN : CR_VALID);
}
private static final long NONASCII_MASK = 0x8080808080808080L;
private static int countUtf8LeadBytes(long d) {
d |= ~(d >>> 1);
d >>>= 6;
d &= NONASCII_MASK >>> 7;
d += (d >>> 8);
d += (d >>> 16);
d += (d >>> 32);
return (int)(d & 0xf);
}
private static final int LONG_SIZE = 8;
private static final int LOWBITS = LONG_SIZE - 1;
@SuppressWarnings("deprecation")
public static int utf8Length(byte[] bytes, int p, int end) {
int len = 0;
if (ARRAY_BYTE_BASE_OFFSET > 0) {
if (end - p > LONG_SIZE * 2) {
int ep = ~LOWBITS & (p + LOWBITS);
while (p < ep) {
if ((bytes[p++] & 0xc0 ) != 0x80) len++;
}
final Unsafe unsafe = org.jruby.util.unsafe.UnsafeHolder.U;
int eend = ~LOWBITS & end;
while (p < eend) {
len += countUtf8LeadBytes(unsafe.getLong(bytes, (long) (ARRAY_BYTE_BASE_OFFSET + p)));
p += LONG_SIZE;
}
}
}
while (p < end) {
if ((bytes[p++] & 0xc0 ) != 0x80) len++;
}
return len;
}
public static int utf8Length(ByteList bytes) {
return utf8Length(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public static int strLength(Encoding enc, byte[]bytes, int p, int end) {
return strLength(enc, bytes, p, end, CR_UNKNOWN);
}
public static int strLength(Encoding enc, byte[]bytes, int p, int e, int cr) {
int c;
if (enc.isFixedWidth()) {
return (e - p + enc.minLength() - 1) / enc.minLength();
} else if (enc.isAsciiCompatible()) {
c = 0;
if (cr == CR_7BIT || cr == CR_VALID) {
while (p < e) {
if (Encoding.isAscii(bytes[p])) {
int q = searchNonAscii(bytes, p, e);
if (q == -1) return c + (e - p);
c += q - p;
p = q;
}
p += encFastMBCLen(bytes, p, e, enc);
c++;
}
} else {
while (p < e) {
if (Encoding.isAscii(bytes[p])) {
int q = searchNonAscii(bytes, p, e);
if (q == -1) return c + (e - p);
c += q - p;
p = q;
}
p += length(enc, bytes, p, e);
c++;
}
}
return c;
}
for (c = 0; p < e; c++) {
p += length(enc, bytes, p, e);
}
return c;
}
public static int strLength(ByteList bytes) {
return strLength(bytes.getEncoding(), bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public static long strLengthWithCodeRange(Encoding enc, byte[]bytes, int p, int end) {
if (enc.isFixedWidth()) {
return (end - p + enc.minLength() - 1) / enc.minLength();
} else if (enc.isAsciiCompatible()) {
return strLengthWithCodeRangeAsciiCompatible(enc, bytes, p, end);
} else {
return strLengthWithCodeRangeNonAsciiCompatible(enc, bytes, p, end);
}
}
public static long strLengthWithCodeRangeAsciiCompatible(Encoding enc, byte[]bytes, int p, int end) {
int cr = 0, c = 0;
while (p < end) {
if (Encoding.isAscii(bytes[p])) {
int q = searchNonAscii(bytes, p, end);
if (q == -1) return pack(c + (end - p), cr == 0 ? CR_7BIT : cr);
c += q - p;
p = q;
}
int cl = preciseLength(enc, bytes, p, end);
if (cl > 0) {
cr |= CR_VALID;
p += cl;
} else {
cr = CR_BROKEN;
p++;
}
c++;
}
return pack(c, cr == 0 ? CR_7BIT : cr);
}
public static long strLengthWithCodeRangeNonAsciiCompatible(Encoding enc, byte[]bytes, int p, int end) {
int cr = 0, c;
for (c = 0; p < end; c++) {
int cl = preciseLength(enc, bytes, p, end);
if (cl > 0) {
cr |= CR_VALID;
p += cl;
} else {
cr = CR_BROKEN;
p++;
}
}
return pack(c, cr == 0 ? CR_7BIT : cr);
}
public static long strLengthWithCodeRange(ByteList bytes) {
return strLengthWithCodeRange(bytes.getEncoding(), bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public static long strLengthWithCodeRange(ByteList bytes, Encoding enc) {
return strLengthWithCodeRange(enc, bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public static long pack(int result, int arg) {
return ((long)arg << 31) | result;
}
public static int unpackResult(long len) {
return (int)len & 0x7fffffff;
}
public static int unpackArg(long cr) {
return (int)(cr >>> 31);
}
public static int codePoint(Encoding enc, byte[] bytes, int p, int end) {
if (p >= end) throw new IllegalArgumentException("empty string");
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) throw new IllegalArgumentException("invalid byte sequence in " + enc);
return enc.mbcToCode(bytes, p, end);
}
public static int codePoint(Ruby runtime, Encoding enc, byte[] bytes, int p, int end) {
try {
return codePoint(enc, bytes, p, end);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
public static int codePoint(final Ruby runtime, final ByteList value) {
return codePoint(runtime, EncodingUtils.getEncoding(value),
value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
}
public static int codeLength(Encoding enc, int c) {
int i = enc.codeToMbcLength(c);
return checkCodepointError(i);
}
public static int checkCodepointError(int i) {
if (i < 0) {
throw new EncodingException(EncodingError.fromCode(i));
}
return i;
}
public static long getAscii(Encoding enc, byte[]bytes, int p, int end) {
return getAscii(enc, bytes, p, end, 0);
}
public static long getAscii(Encoding enc, byte[]bytes, int p, int end, int len) {
if (p >= end) return pack(-1, len);
if (enc.isAsciiCompatible()) {
int c = bytes[p] & 0xff;
if (!Encoding.isAscii(c)) return pack(-1, len);
return pack(c, len == 0 ? 0 : 1);
} else {
int cl = preciseLength(enc, bytes, p, end);
if (cl <= 0) return pack(-1, len);
int c = enc.mbcToCode(bytes, p, end);
if (!Encoding.isAscii(c)) return pack(-1, len);
return pack(c, len == 0 ? 0 : cl);
}
}
public static int preciseCodePoint(Encoding enc, byte[]bytes, int p, int end) {
int l = preciseLength(enc, bytes, p, end);
if (l > 0) return enc.mbcToCode(bytes, p, end);
return -1;
}
@SuppressWarnings("deprecation")
public static int utf8Nth(byte[]bytes, int p, int e, int nth) {
while (p < e) {
if ((bytes[p] & 0xc0 ) != 0x80) {
if (nth == 0) break;
nth--;
}
p++;
}
return p;
}
public static int nth(Encoding enc, byte[]bytes, int p, int end, int n) {
return nth(enc, bytes, p, end, n, enc.isSingleByte());
}
public static int nth(Encoding enc, byte[]bytes, int p, int end, int n, boolean singlebyte) {
if (singlebyte) {
p += n;
} else if (enc.isFixedWidth()) {
p += n * enc.maxLength();
} else if (enc.isAsciiCompatible()) {
p = nthAsciiCompatible(enc, bytes, p, end, n);
} else {
p = nthNonAsciiCompatible(enc, bytes, p, end, n);
}
if (p < 0) return -1;
return p > end ? end : p;
}
private static int nthAsciiCompatible(Encoding enc, byte[]bytes, int p, int end, int n) {
while (p < end && n > 0) {
int end2 = p + n;
if (end < end2) return end;
if (Encoding.isAscii(bytes[p])) {
int p2 = searchNonAscii(bytes, p, end2);
if (p2 == -1) return end2;
n -= p2 - p;
p = p2;
}
int cl = length(enc, bytes, p, end);
p += cl;
n--;
}
return n != 0 ? end : p;
}
private static int nthNonAsciiCompatible(Encoding enc, byte[]bytes, int p, int end, int n) {
while (p < end && n-- != 0) {
p += length(enc, bytes, p, end);
}
return p;
}
public static int utf8Offset(byte[]bytes, int p, int end, int n) {
int pp = utf8Nth(bytes, p, end, n);
return pp == -1 ? end - p : pp - p;
}
public static int offset(Encoding enc, byte[]bytes, int p, int end, int n) {
int pp = nth(enc, bytes, p, end, n);
return pp == -1 ? end - p : pp - p;
}
public static int offset(Encoding enc, byte[]bytes, int p, int end, int n, boolean singlebyte) {
int pp = nth(enc, bytes, p, end, n, singlebyte);
return pp == -1 ? end - p : pp - p;
}
public static int offset(RubyString str, int pos) {
ByteList value = str.getByteList();
return offset(str.getEncoding(), value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize(), pos);
}
@Deprecated
public static int toLower(Encoding enc, int c) {
return Encoding.isAscii(c) ? AsciiTables.ToLowerCaseTable[c] : c;
}
@Deprecated
public static int toUpper(Encoding enc, int c) {
return Encoding.isAscii(c) ? AsciiTables.ToUpperCaseTable[c] : c;
}
public static int caseCmp(byte[]bytes1, int p1, byte[]bytes2, int p2, int len) {
int i = -1;
for (; ++i < len && bytes1[p1 + i] == bytes2[p2 + i];) {}
if (i < len) return (bytes1[p1 + i] & 0xff) > (bytes2[p2 + i] & 0xff) ? 1 : -1;
return 0;
}
public static int scanHex(byte[]bytes, int p, int len) {
return scanHex(bytes, p, len, ASCIIEncoding.INSTANCE);
}
public static int scanHex(byte[]bytes, int p, int len, Encoding enc) {
int v = 0;
int c;
while (len-- > 0 && enc.isXDigit(c = bytes[p++] & 0xff)) {
v = (v << 4) + enc.xdigitVal(c);
}
return v;
}
public static int hexLength(byte[]bytes, int p, int len) {
return hexLength(bytes, p, len, ASCIIEncoding.INSTANCE);
}
public static int hexLength(byte[]bytes, int p, int len, Encoding enc) {
int hlen = 0;
while (len-- > 0 && enc.isXDigit(bytes[p++] & 0xff)) hlen++;
return hlen;
}
public static int scanOct(byte[]bytes, int p, int len) {
return scanOct(bytes, p, len, ASCIIEncoding.INSTANCE);
}
public static int scanOct(byte[]bytes, int p, int len, Encoding enc) {
int v = 0;
int c;
while (len-- > 0 && enc.isDigit(c = bytes[p++] & 0xff) && c < '8') {
v = (v << 3) + Encoding.digitVal(c);
}
return v;
}
public static int octLength(byte[]bytes, int p, int len) {
return octLength(bytes, p, len, ASCIIEncoding.INSTANCE);
}
public static int octLength(byte[]bytes, int p, int len, Encoding enc) {
int olen = 0;
int c;
while (len-- > 0 && enc.isDigit(c = bytes[p++] & 0xff) && c < '8') olen++;
return olen;
}
public static final void checkStringSafety(Ruby runtime, IRubyObject value) {
RubyString s = value.asString();
ByteList bl = s.getByteList();
final byte[] array = bl.getUnsafeBytes();
final int end = bl.length();
for (int i = bl.begin(); i < end; ++i) {
if (array[i] == (byte) 0) {
throw runtime.newSecurityError("string contains null byte");
}
}
}
public static String escapedCharFormat(int c, boolean isUnicode) {
String format;
if (isUnicode) {
if ((c & 0xFFFFFFFFL) < 0x7F && Encoding.isAscii(c) && ASCIIEncoding.INSTANCE.isPrint(c)) {
format = "%c";
} else if (c < 0x10000) {
format = "\\u%04X";
} else {
format = "\\u{%X}";
}
} else {
if ((c & 0xFFFFFFFFL) < 0x100) {
format = "\\x%02X";
} else {
format = "\\x{%X}";
}
}
return format;
}
public static boolean isIncompleteChar(int b) {
return b < -1;
}
public static int bytesToFixBrokenTrailingCharacter(ByteList val, int usingLength) {
return bytesToFixBrokenTrailingCharacter(val.getUnsafeBytes(), val.getBegin(), val.getRealSize(), val.getEncoding(), usingLength);
}
public static int bytesToFixBrokenTrailingCharacter(byte[] bytes, int begin, int byteSize, Encoding encoding, int usingLength) {
if (byteSize > 0) {
int charHead = encoding.leftAdjustCharHead(
bytes,
begin,
begin + usingLength - 1,
begin + usingLength);
charHead -= begin;
byte byteHead = (byte)(bytes[begin + charHead] & 0xFF);
int extra = encoding.length(byteHead);
extra -= usingLength - charHead;
return extra;
}
return 0;
}
public static int memchr(byte[] ptr, int start, final int find, int len) {
for (int i = start; i < start + len; i++) {
if ( ptr[i] == find ) return i;
}
return -1;
}
public static RubyString checkEmbeddedNulls(Ruby runtime, IRubyObject ptr) {
final RubyString s = ptr.convertToString();
ByteList sByteList = s.getByteList();
byte[] sBytes = sByteList.unsafeBytes();
int beg = sByteList.begin();
int len = sByteList.length();
final Encoding enc = s.getEncoding();
final int minlen = enc.minLength();
if (minlen > 1) {
if (strNullChar(sBytes, beg, len, minlen, enc) != -1) {
throw runtime.newArgumentError("string contains null char");
}
return strFillTerm(s, sBytes, beg, len, minlen);
}
if (memchr(sBytes, beg, '\0', len) != -1) {
throw runtime.newArgumentError("string contains null byte");
}
return s;
}
private static int strNullChar(byte[] sBytes, int s, int len, final int minlen, Encoding enc) {
int e = s + len;
for (; s + minlen <= e; s += enc.length(sBytes, s, e)) {
if (zeroFilled(sBytes, s, minlen)) return s;
}
return -1;
}
private static boolean zeroFilled(byte[] sBytes, int s, int n) {
for (; n > 0; --n) {
if (sBytes[s++] != 0) return false;
}
return true;
}
private static RubyString strFillTerm(RubyString str, byte[] sBytes, int beg, int len, int termlen) {
int capa = sBytes.length - beg;
if (capa < len + termlen) {
str = str.makeIndependent(len + termlen);
sBytes = str.getByteList().unsafeBytes();
beg = str.getByteList().begin();
}
else if ( ! str.independent() ) {
if ( ! zeroFilled(sBytes, beg + len, termlen) ) {
str = str.makeIndependent(len + termlen);
sBytes = str.getByteList().unsafeBytes();
beg = str.getByteList().begin();
}
}
TERM_FILL(sBytes, beg, len, termlen);
return str;
}
private static void TERM_FILL(byte[] ptr, final int beg, final int len, final int termlen) {
final int p = beg + len; Arrays.fill(ptr, p, p + termlen, (byte) '\0');
}
public static int positionEndForScan(ByteList value, Matcher matcher, Encoding enc, int begin, int range) {
int end = matcher.getEnd();
if (matcher.getBegin() == end) {
if (value.getRealSize() > end) {
return end + enc.length(value.getUnsafeBytes(), begin + end, range);
} else {
return end + 1;
}
} else {
return end;
}
}
public static ByteList dumpCommon(Ruby runtime, ByteList bytelist) {
return dumpCommon(runtime, bytelist, false);
}
public static ByteList dumpCommon(Ruby runtime, ByteList byteList, boolean quoteOnlyIfNeeded) {
Encoding enc = byteList.getEncoding();
boolean includingsNonprintable = false;
int p = byteList.getBegin();
int end = p + byteList.getRealSize();
byte[]bytes = byteList.getUnsafeBytes();
int len = 2;
while (p < end) {
int c = bytes[p++] & 0xff;
switch (c) {
case '"':case '\\':case '\n':case '\r':case '\t':case '\f':
case '\013': case '\010': case '\007': case '\033':
len += 2;
break;
case '#':
len += isEVStr(bytes, p, end) ? 2 : 1;
break;
default:
if (ASCIIEncoding.INSTANCE.isPrint(c)) {
len++;
} else {
includingsNonprintable = true;
if (enc.isUTF8() && c > 0x7F) {
int n = preciseLength(enc, bytes, p - 1, end) - 1;
if (MBCLEN_CHARFOUND_LEN(n) > 0) {
int cc = codePoint(runtime, enc, bytes, p - 1, end);
if (cc <= 0xFFFF) {
len += 6;
} else if (cc <= 0xFFFFF) {
len += 9;
} else {
len += 10;
}
p += MBCLEN_CHARFOUND_LEN(n) - 1;
break;
}
}
len += 4;
}
break;
}
}
if (!enc.isAsciiCompatible()) {
len += ".force_encoding(\"".length() + enc.getName().length + "\")".length();
}
ByteList outBytes = new ByteList(len);
byte out[] = outBytes.getUnsafeBytes();
int q = 0;
p = byteList.getBegin();
end = p + byteList.getRealSize();
if ((quoteOnlyIfNeeded && includingsNonprintable) || !quoteOnlyIfNeeded) out[q++] = '"';
while (p < end) {
int c = bytes[p++] & 0xff;
switch (c) {
case '"': case '\\':
out[q++] = '\\'; out[q++] = (byte)c; break;
case '#':
if (isEVStr(bytes, p, end)) out[q++] = '\\';
out[q++] = '#';
break;
case '\n':
out[q++] = '\\'; out[q++] = 'n'; break;
case '\r':
out[q++] = '\\'; out[q++] = 'r'; break;
case '\t':
out[q++] = '\\'; out[q++] = 't'; break;
case '\f':
out[q++] = '\\'; out[q++] = 'f'; break;
case '\013':
out[q++] = '\\'; out[q++] = 'v'; break;
case '\010':
out[q++] = '\\'; out[q++] = 'b'; break;
case '\007':
out[q++] = '\\'; out[q++] = 'a'; break;
case '\033':
out[q++] = '\\'; out[q++] = 'e'; break;
default:
if (ASCIIEncoding.INSTANCE.isPrint(c)) {
out[q++] = (byte)c;
} else {
out[q++] = '\\';
outBytes.setRealSize(q);
if (enc.isUTF8()) {
int n = preciseLength(enc, bytes, p - 1, end) - 1;
if (MBCLEN_CHARFOUND_LEN(n) > 0) {
int cc = codePoint(runtime, enc, bytes, p - 1, end);
outBytes.setRealSize(q);
p += n;
if (cc <= 0xFFFF) {
Sprintf.sprintf(runtime, outBytes, "u%04X", cc);
} else {
Sprintf.sprintf(runtime, outBytes, "u{%X}", cc);
}
q = outBytes.getRealSize();
continue;
}
}
Sprintf.sprintf(runtime, outBytes, "x%02X", c);
q = outBytes.getRealSize();
}
}
}
if ((quoteOnlyIfNeeded && includingsNonprintable) || !quoteOnlyIfNeeded) out[q++] = '"';
outBytes.setRealSize(q);
assert out == outBytes.getUnsafeBytes();
return outBytes;
}
public static boolean isEVStr(byte[] bytes, int p, int end) {
return p < end ? isEVStr(bytes[p] & 0xff) : false;
}
public static boolean isEVStr(int c) {
return c == '$' || c == '@' || c == '{';
}
public static int strCount(ByteList str, boolean[] table, TrTables tables, Encoding enc) {
final byte[] bytes = str.getUnsafeBytes();
int p = str.getBegin();
final int end = p + str.getRealSize();
final boolean asciiCompat = enc.isAsciiCompatible();
int count = 0;
while (p < end) {
int c;
if (asciiCompat && (c = bytes[p] & 0xff) < 0x80) {
if (table[c]) count++;
p++;
} else {
c = codePoint(enc, bytes, p, end);
int cl = codeLength(enc, c);
if (trFind(c, table, tables)) count++;
p += cl;
}
}
return count;
}
public static int strCount(ByteList str, Ruby runtime, boolean[] table, TrTables tables, Encoding enc) {
try {
return strCount(str, table, tables, enc);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
public static int countCommon19(ByteList str, Ruby runtime, boolean[] table, TrTables tables, Encoding enc) {
return strCount(str, runtime, table, tables, enc);
}
public static int rindex(ByteList source, int sourceChars, int subChars, int pos, CodeRangeable subStringCodeRangeable, Encoding enc) {
if (subStringCodeRangeable.scanForCodeRange() == CR_BROKEN) return -1;
final ByteList subString = subStringCodeRangeable.getByteList();
final int srcLen = source.getRealSize();
final int subLen = subString.getRealSize();
if (sourceChars < subChars || srcLen < subLen) return -1;
if (sourceChars - pos < subChars) pos = sourceChars - subChars;
if (sourceChars == 0) return pos;
byte[] srcBytes = source.getUnsafeBytes();
final int srcBeg = source.getBegin();
if (pos == 0) {
if (ByteList.memcmp(srcBytes, srcBeg, subString.getUnsafeBytes(), subString.getBegin(), subLen) == 0) {
return 0;
}
return -1;
}
int s = nth(enc, srcBytes, srcBeg, srcBeg + srcLen, pos);
return strRindex(srcBytes, srcBeg, srcLen, subString.getUnsafeBytes(), subString.getBegin(), subLen, s, pos, enc);
}
private static int strRindex(final byte[] strBytes, final int strBeg, final int strLen,
final byte[] subBytes, final int subBeg, final int subLen,
int s, int pos, final Encoding enc) {
final int e = strBeg + strLen;
while (s >= strBeg) {
if (s + subLen <= e && ByteList.memcmp(strBytes, s, subBytes, subBeg, subLen) == 0) {
return pos;
}
if (pos == 0) break; pos--;
s = enc.prevCharHead(strBytes, strBeg, s, e);
}
return -1;
}
public static int strLengthFromRubyString(CodeRangeable string, Encoding enc) {
final ByteList bytes = string.getByteList();
if (isSingleByteOptimizable(string, enc)) return bytes.getRealSize();
return strLengthFromRubyStringFull(string, bytes, enc);
}
public static int strLengthFromRubyString(CodeRangeable string) {
final ByteList bytes = string.getByteList();
if (isSingleByteOptimizable(string, bytes.getEncoding())) return bytes.getRealSize();
return strLengthFromRubyStringFull(string, bytes, bytes.getEncoding());
}
public static int strLengthFromRubyString(CodeRangeable string, final ByteList bytes, final Encoding enc) {
if (isSingleByteOptimizable(string, enc)) return bytes.getRealSize();
if (string.isCodeRangeValid() && enc.isUTF8()) return utf8Length(bytes);
long lencr = strLengthWithCodeRange(bytes, enc);
return unpackResult(lencr);
}
private static int strLengthFromRubyStringFull(CodeRangeable string, ByteList bytes, Encoding enc) {
if (string.isCodeRangeValid() && enc.isUTF8()) return utf8Length(bytes);
long lencr = strLengthWithCodeRange(bytes, enc);
int cr = unpackArg(lencr);
if (cr != 0) string.setCodeRange(cr);
return unpackResult(lencr);
}
public static final class TR {
public TR(ByteList bytes) {
p = bytes.getBegin();
pend = bytes.getRealSize() + p;
buf = bytes.getUnsafeBytes();
now = max = 0;
gen = false;
}
final byte[] buf;
int p, pend, now, max;
boolean gen;
}
public static final class TrTables {
IntHashMap<Object> del, noDel;
}
private static final Object DUMMY_VALUE = "";
public static TrTables trSetupTable(final ByteList str,
final boolean[] stable, TrTables tables, final boolean first, final Encoding enc) {
int i, l[] = {0};
final boolean cflag;
final TR tr = new TR(str);
if (str.realSize() > 1 && EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, l, enc) == '^') {
cflag = true;
tr.p += l[0];
}
else {
cflag = false;
}
if (first) {
for ( i = 0; i < TRANS_SIZE; i++) stable[i] = true;
stable[TRANS_SIZE] = cflag;
}
else if (stable[TRANS_SIZE] && !cflag) {
stable[TRANS_SIZE] = false;
}
if (tables == null) tables = new TrTables();
byte[] buf = null;
IntHashMap<Object> table = null, ptable = null;
int c;
while ((c = trNext(tr, enc)) != -1) {
if (c < TRANS_SIZE) {
if ( buf == null ) {
buf = new byte[TRANS_SIZE];
for ( i = 0; i < TRANS_SIZE; i++ ) {
buf[i] = (byte) (cflag ? 1 : 0);
}
}
buf[c & 0xff] = (byte) (cflag ? 0 : 1);
}
else {
if ( table == null && (first || tables.del != null || stable[TRANS_SIZE]) ) {
if ( cflag ) {
ptable = tables.noDel;
table = ptable != null ? ptable : new IntHashMap<>(8);
tables.noDel = table;
}
else {
table = new IntHashMap<>(8);
ptable = tables.del;
tables.del = table;
}
}
if ( table != null ) {
final int key = c;
if ( ptable == null ) table.put(key, DUMMY_VALUE);
else {
if ( cflag ) table.put(key, DUMMY_VALUE);
else {
final boolean val = ptable.get(key) != null;
table.put(key, val ? DUMMY_VALUE : null);
}
}
}
}
}
if ( buf != null ) {
for ( i = 0; i < TRANS_SIZE; i++ ) {
stable[i] = stable[i] && buf[i] != 0;
}
}
else {
for ( i = 0; i < TRANS_SIZE; i++ ) {
stable[i] = stable[i] && cflag;
}
}
if ( table == null && ! cflag ) tables.del = null;
return tables;
}
public static TrTables trSetupTable(final ByteList str, final Ruby runtime,
final boolean[] stable, TrTables tables, final boolean first, final Encoding enc) {
try {
return trSetupTable(str, stable, tables, first, enc);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
public static boolean trFind(final int c, final boolean[] table, final TrTables tables) {
if (c < TRANS_SIZE) return table[c];
final IntHashMap<Object> del = tables.del, noDel = tables.noDel;
if (del != null) {
if (del.get(c) != null &&
(noDel == null || noDel.get(c) == null)) {
return true;
}
}
else if (noDel != null && noDel.get(c) != null) {
return false;
}
return table[TRANS_SIZE];
}
public static int trNext(final TR tr, Encoding enc) {
for (;;) {
if ( ! tr.gen ) {
return trNext_nextpart(tr, enc);
}
while (enc.codeToMbcLength( ++tr.now ) <= 0) {
if (tr.now == tr.max) {
tr.gen = false;
return trNext_nextpart(tr, enc);
}
}
if (tr.now < tr.max) {
return tr.now;
} else {
tr.gen = false;
return tr.max;
}
}
}
public static int trNext(final TR tr, Ruby runtime, Encoding enc) {
try {
return trNext(tr, enc);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
private static int trNext_nextpart(final TR tr, Encoding enc) {
final int[] n = {0};
if (tr.p == tr.pend) return -1;
if (EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, n, enc) == '\\' && tr.p + n[0] < tr.pend) {
tr.p += n[0];
}
tr.now = EncodingUtils.encCodepointLength(tr.buf, tr.p, tr.pend, n, enc);
tr.p += n[0];
if (EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, n, enc) == '-' && tr.p + n[0] < tr.pend) {
tr.p += n[0];
if (tr.p < tr.pend) {
int c = EncodingUtils.encCodepointLength(tr.buf, tr.p, tr.pend, n, enc);
tr.p += n[0];
if (tr.now > c) {
if (tr.now < 0x80 && c < 0x80) {
throw new IllegalArgumentException("invalid range \""
+ (char) tr.now + '-' + (char) c + "\" in string transliteration");
}
throw new IllegalArgumentException("invalid range in string transliteration");
}
tr.gen = true;
tr.max = c;
}
}
return tr.now;
}
public static enum NeighborChar {NOT_CHAR, FOUND, WRAPPED}
public static ByteList succCommon(ByteList original) {
byte carry[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int carryP = 0;
carry[0] = 1;
int carryLen = 1;
ByteList valueCopy = new ByteList(original);
valueCopy.setEncoding(original.getEncoding());
Encoding enc = original.getEncoding();
int p = valueCopy.getBegin();
int end = p + valueCopy.getRealSize();
int s = end;
byte[]bytes = valueCopy.getUnsafeBytes();
NeighborChar neighbor = NeighborChar.FOUND;
int lastAlnum = -1;
boolean alnumSeen = false;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
if (neighbor == NeighborChar.NOT_CHAR && lastAlnum != -1) {
ASCIIEncoding ascii = ASCIIEncoding.INSTANCE;
if (ascii.isAlpha(bytes[lastAlnum] & 0xff) ?
ascii.isDigit(bytes[s] & 0xff) :
ascii.isDigit(bytes[lastAlnum] & 0xff) ?
ascii.isAlpha(bytes[s] & 0xff) : false) {
s = lastAlnum;
break;
}
}
int cl = preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
switch (neighbor = succAlnumChar(enc, bytes, s, cl, carry, 0)) {
case NOT_CHAR: continue;
case FOUND: return valueCopy;
case WRAPPED: lastAlnum = s;
}
alnumSeen = true;
carryP = s - p;
carryLen = cl;
}
if (!alnumSeen) {
s = end;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
int cl = preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
neighbor = succChar(enc, bytes, s, cl);
if (neighbor == NeighborChar.FOUND) return valueCopy;
if (preciseLength(enc, bytes, s, s + 1) != cl) succChar(enc, bytes, s, cl);
if (!enc.isAsciiCompatible()) {
System.arraycopy(bytes, s, carry, 0, cl);
carryLen = cl;
}
carryP = s - p;
}
}
valueCopy.ensure(valueCopy.getBegin() + valueCopy.getRealSize() + carryLen);
s = valueCopy.getBegin() + carryP;
System.arraycopy(valueCopy.getUnsafeBytes(), s, valueCopy.getUnsafeBytes(), s + carryLen, valueCopy.getRealSize() - carryP);
System.arraycopy(carry, 0, valueCopy.getUnsafeBytes(), s, carryLen);
valueCopy.setRealSize(valueCopy.getRealSize() + carryLen);
return valueCopy;
}
public static ByteList succCommon(Ruby runtime, ByteList original) {
try {
return succCommon(original);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len) {
int l;
if (enc.minLength() > 1) {
int r = preciseLength(enc, bytes, p, p + len), c;
if (!MBCLEN_CHARFOUND_P(r)) {
return NeighborChar.NOT_CHAR;
}
c = codePoint(enc, bytes, p, p + len) + 1;
l = codeLength(enc, c);
if (l == 0) return NeighborChar.NOT_CHAR;
if (l != len) return NeighborChar.WRAPPED;
EncodingUtils.encMbcput(c, bytes, p, enc);
r = preciseLength(enc, bytes, p, p + len);
if (!MBCLEN_CHARFOUND_P(r)) {
return NeighborChar.NOT_CHAR;
}
return NeighborChar.FOUND;
}
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == (byte)0xff; i--) bytes[p + i] = 0;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) + 1);
l = preciseLength(enc, bytes, p, p + len);
if (MBCLEN_CHARFOUND_P(l)) {
l = MBCLEN_CHARFOUND_LEN(l);
if (l == len) {
return NeighborChar.FOUND;
} else {
int start = p + l;
int end = start + (len - l);
Arrays.fill(bytes, start, end, (byte) 0xff);
}
}
if (MBCLEN_INVALID_P(l) && i < len - 1) {
int len2;
int l2;
for (len2 = len-1; 0 < len2; len2--) {
l2 = preciseLength(enc, bytes, p, p + len2);
if (!MBCLEN_INVALID_P(l2))
break;
}
int start = p+len2+1;
int end = start + len-(len2+1);
Arrays.fill(bytes, start, end, (byte)0xff);
}
}
}
public static NeighborChar succChar(Ruby runtime, Encoding enc, byte[] bytes, int p, int len) {
try {
return succChar(enc, bytes, p, len);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) {
byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int c = enc.mbcToCode(bytes, p, p + len);
final int cType;
if (enc.isDigit(c)) {
cType = CharacterType.DIGIT;
} else if (enc.isAlpha(c)) {
cType = CharacterType.ALPHA;
} else {
return NeighborChar.NOT_CHAR;
}
System.arraycopy(bytes, p, save, 0, len);
NeighborChar ret = succChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND;
}
System.arraycopy(save, 0, bytes, p, len);
int range = 1;
while (true) {
System.arraycopy(bytes, p, save, 0, len);
ret = predChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (!enc.isCodeCType(c, cType)) {
System.arraycopy(save, 0, bytes, p, len);
break;
}
} else {
System.arraycopy(save, 0, bytes, p, len);
break;
}
range++;
}
if (range == 1) return NeighborChar.NOT_CHAR;
if (cType != CharacterType.DIGIT) {
System.arraycopy(bytes, p, carry, carryP, len);
return NeighborChar.WRAPPED;
}
System.arraycopy(bytes, p, carry, carryP, len);
succChar(enc, carry, carryP, len);
return NeighborChar.WRAPPED;
}
private static NeighborChar predChar(Encoding enc, byte[]bytes, int p, int len) {
int l;
if (enc.minLength() > 1) {
int r = preciseLength(enc, bytes, p, p + len), c;
if (!MBCLEN_CHARFOUND_P(r)) {
return NeighborChar.NOT_CHAR;
}
c = codePoint(enc, bytes, p, p + len);
if (c == 0) return NeighborChar.NOT_CHAR;
--c;
l = codeLength(enc, c);
if (l == 0) return NeighborChar.NOT_CHAR;
if (l != len) return NeighborChar.WRAPPED;
EncodingUtils.encMbcput(c, bytes, p, enc);
r = preciseLength(enc, bytes, p, p + len);
if (!MBCLEN_CHARFOUND_P(r)) {
return NeighborChar.NOT_CHAR;
}
return NeighborChar.FOUND;
}
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == 0; i--) bytes[p + i] = (byte)0xff;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) - 1);
l = preciseLength(enc, bytes, p, p + len);
if (MBCLEN_CHARFOUND_P(l)) {
l = MBCLEN_CHARFOUND_LEN(l);
if (l == len) {
return NeighborChar.FOUND;
} else {
int start = p + l;
int end = start + (len - l);
Arrays.fill(bytes, start, end, (byte) 0x0);
}
}
if (!MBCLEN_CHARFOUND_P(l) && i < len-1) {
int len2;
int l2;
for (len2 = len-1; 0 < len2; len2--) {
l2 = preciseLength(enc, bytes, p, p + len2);
if (!MBCLEN_INVALID_P(l2))
break;
}
int start = p + len2 + 1;
int end = start + (len - (len2 + 1));
Arrays.fill(bytes, start, end, (byte) 0);
}
}
}
public static boolean isSingleByteOptimizable(CodeRangeable string, Encoding encoding) {
return string.getCodeRange() == CR_7BIT || encoding.maxLength() == 1;
}
public static int index(ByteList source, ByteList other, int offset, Encoding enc) {
int sourceLen = source.realSize();
int sourceBegin = source.begin();
int otherLen = other.realSize();
if (otherLen == 0) return offset;
if (sourceLen - offset < otherLen) return -1;
byte[] sourceBytes = source.getUnsafeBytes();
int p = sourceBegin + offset;
int end = p + sourceLen;
while (true) {
int pos = source.indexOf(other, p - sourceBegin);
if (pos < 0) return pos;
pos -= (p - sourceBegin);
int t = enc.rightAdjustCharHead(sourceBytes, p, p + pos, end);
if (t == p + pos) return pos + offset;
if ((sourceLen -= t - p) <= 0) return -1;
offset += t - p;
p = t;
}
}
public static int index(CodeRangeable sourceString, CodeRangeable otherString, int offset, Encoding enc) {
if (otherString.scanForCodeRange() == CR_BROKEN) return -1;
int sourceLen = strLengthFromRubyString(sourceString);
int otherLen = strLengthFromRubyString(otherString);
if (offset < 0) {
offset += sourceLen;
if (offset < 0) return -1;
}
final ByteList source = sourceString.getByteList();
final ByteList other = otherString.getByteList();
if (sourceLen - offset < otherLen) return -1;
byte[] bytes = source.getUnsafeBytes();
int p = source.getBegin();
int end = p + source.getRealSize();
if (offset != 0) {
offset = isSingleByteOptimizable(sourceString, enc) ? offset : offset(enc, bytes, p, end, offset);
p += offset;
}
if (otherLen == 0) return offset;
while (true) {
int pos = source.indexOf(other, p - source.getBegin());
if (pos < 0) return pos;
pos -= (p - source.getBegin());
int t = enc.rightAdjustCharHead(bytes, p, p + pos, end);
if (t == p + pos) return pos + offset;
if ((sourceLen -= t - p) <= 0) return -1;
offset += t - p;
p = t;
}
}
public static void associateEncoding(CodeRangeable string, Encoding enc) {
final ByteList value = string.getByteList();
if (value.getEncoding() != enc) {
if (!CodeRangeSupport.isCodeRangeAsciiOnly(string) || !enc.isAsciiCompatible()) string.clearCodeRange();
value.setEncoding(enc);
}
}
public static ByteList replaceInternal(int beg, int len, ByteListHolder source, CodeRangeable repl) {
int oldLength = source.getByteList().getRealSize();
if (beg + len >= oldLength) len = oldLength - beg;
ByteList replBytes = repl.getByteList();
int replLength = replBytes.getRealSize();
int newLength = oldLength + replLength - len;
byte[] oldBytes = source.getByteList().getUnsafeBytes();
int oldBegin = source.getByteList().getBegin();
source.modify(newLength);
if (replLength != len) {
System.arraycopy(oldBytes, oldBegin + beg + len, source.getByteList().getUnsafeBytes(), beg + replLength, oldLength - (beg + len));
}
if (replLength > 0) System.arraycopy(replBytes.getUnsafeBytes(), replBytes.getBegin(), source.getByteList().getUnsafeBytes(), beg, replLength);
source.getByteList().setRealSize(newLength);
return source.getByteList();
}
public static void replaceInternal19(int beg, int len, CodeRangeable source, CodeRangeable repl) {
Encoding enc = source.checkEncoding(repl);
source.modify();
source.keepCodeRange();
ByteList sourceBL = source.getByteList();
byte[] sourceBytes = sourceBL.unsafeBytes();
int sourceBeg = sourceBL.begin();
int sourceEnd = sourceBeg + sourceBL.realSize();
boolean singlebyte = isSingleByteOptimizable(source, source.getByteList().getEncoding());
int p = nth(enc, sourceBytes, sourceBeg, sourceEnd, beg, singlebyte);
if (p == -1) p = sourceEnd;
int e = nth(enc, sourceBytes, p, sourceEnd, len, singlebyte);
if (e == -1) e = sourceEnd;
beg = p - sourceBeg;
len = e - p;
replaceInternal(beg, len, source, repl);
associateEncoding(source, enc);
int cr = CodeRangeSupport.codeRangeAnd(source.getCodeRange(), repl.getCodeRange());
if (cr != CR_BROKEN) source.setCodeRange(cr);
}
public static void replaceInternal19(Ruby runtime, int beg, int len, RubyString source, RubyString repl) {
if (len < 0) throw runtime.newIndexError("negative length " + len);
int slen = strLengthFromRubyString(source, source.checkEncoding(repl));
if (slen < beg) {
throw runtime.newIndexError("index " + beg + " out of string");
}
if (beg < 0) {
if (-beg > slen) {
throw runtime.newIndexError("index " + beg + " out of string");
}
beg += slen;
}
if (slen < len || slen < beg + len) {
len = slen - beg;
}
replaceInternal19(beg, len, source, repl);
if (repl.isTaint()) source.setTaint(true);
}
public static boolean isAsciiOnly(CodeRangeable string) {
return isAsciiOnly(string.getByteList().getEncoding(), string.scanForCodeRange());
}
private static boolean isAsciiOnly(Encoding encoding, final int codeRange) {
return encoding.isAsciiCompatible() && codeRange == CR_7BIT;
}
static CodeRangeable strDeleteBang(CodeRangeable rubyString, boolean[] squeeze, TrTables tables, Encoding enc) {
rubyString.modify();
rubyString.keepCodeRange();
final ByteList value = rubyString.getByteList();
int s = value.getBegin();
int t = s;
int send = s + value.getRealSize();
byte[]bytes = value.getUnsafeBytes();
boolean modify = false;
boolean asciiCompatible = enc.isAsciiCompatible();
int cr = asciiCompatible ? CR_7BIT : CR_VALID;
while (s < send) {
int c;
if (asciiCompatible && Encoding.isAscii(c = bytes[s] & 0xff)) {
if (squeeze[c]) {
modify = true;
} else {
if (t != s) bytes[t] = (byte)c;
t++;
}
s++;
} else {
c = codePoint(enc, bytes, s, send);
int cl = codeLength(enc, c);
if (trFind(c, squeeze, tables)) {
modify = true;
} else {
if (t != s) enc.codeToMbc(c, bytes, t);
t += cl;
if (cr == CR_7BIT) cr = CR_VALID;
}
s += cl;
}
}
value.setRealSize(t - value.getBegin());
rubyString.setCodeRange(cr);
return modify ? rubyString : null;
}
public static CodeRangeable strDeleteBang(CodeRangeable rubyString, Ruby runtime, boolean[] squeeze, TrTables tables, Encoding enc) {
try {
return strDeleteBang(rubyString, squeeze, tables, enc);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
public static int choppedLength(CodeRangeable str) {
ByteList bl = str.getByteList();
Encoding enc = bl.getEncoding();
int p, p2, beg, end;
beg = bl.begin();
end = beg + bl.realSize();
if (beg > end) return 0;
p = enc.prevCharHead(bl.unsafeBytes(), beg, end, end);
if (p == 0) return 0;
if (p > beg && EncodingUtils.encAscget(bl.unsafeBytes(), p, end, null, enc) == '\n') {
p2 = enc.prevCharHead(bl.unsafeBytes(), beg, p, end);
if (p2 != -1 && EncodingUtils.encAscget(bl.unsafeBytes(), p2, end, null, enc) == '\r') p = p2;
}
return p - beg;
}
@Deprecated
public static int choppedLength19(CodeRangeable str, Ruby runtime) {
return choppedLength(str);
}
public static Encoding areCompatible(CodeRangeable str1, CodeRangeable str2) {
Encoding enc1 = str1.getByteList().getEncoding();
Encoding enc2 = str2.getByteList().getEncoding();
if (enc1 == enc2) return enc1;
if (str2.getByteList().getRealSize() == 0) return enc1;
if (str1.getByteList().getRealSize() == 0) {
return (enc1.isAsciiCompatible() && isAsciiOnly(str2)) ? enc1 : enc2;
}
if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
return RubyEncoding.areCompatible(enc1, str1.scanForCodeRange(), enc2, str2.scanForCodeRange());
}
public static Encoding areCompatible(ByteList str1, ByteList str2) {
Encoding enc1 = str1.getEncoding();
Encoding enc2 = str2.getEncoding();
if (enc1 == enc2) return enc1;
if (str2.getRealSize() == 0) return enc1;
if (str1.getRealSize() == 0) {
return (enc1.isAsciiCompatible() && isAsciiOnly(enc2, scanForCodeRange(str2))) ? enc1 : enc2;
}
if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
return RubyEncoding.areCompatible(enc1, scanForCodeRange(str1), enc2, scanForCodeRange(str2));
}
public static ByteList addByteLists(ByteList value1, ByteList value2) {
ByteList result = new ByteList(value1.getRealSize() + value2.getRealSize());
result.setRealSize(value1.getRealSize() + value2.getRealSize());
System.arraycopy(value1.getUnsafeBytes(), value1.getBegin(), result.getUnsafeBytes(), 0, value1.getRealSize());
System.arraycopy(value2.getUnsafeBytes(), value2.getBegin(), result.getUnsafeBytes(), value1.getRealSize(), value2.getRealSize());
return result;
}
public static boolean areComparable(CodeRangeable string, CodeRangeable other) {
ByteList otherValue = other.getByteList();
if (string.getByteList().getEncoding() == otherValue.getEncoding() ||
string.getByteList().getRealSize() == 0 || otherValue.getRealSize() == 0) return true;
return areComparableViaCodeRange(string, other);
}
public static boolean areComparableViaCodeRange(CodeRangeable string, CodeRangeable other) {
int cr1 = string.scanForCodeRange();
int cr2 = other.scanForCodeRange();
if (cr1 == CR_7BIT && (cr2 == CR_7BIT || other.getByteList().getEncoding().isAsciiCompatible())) return true;
if (cr2 == CR_7BIT && string.getByteList().getEncoding().isAsciiCompatible()) return true;
return false;
}
public static IRubyObject rbStrEnumerateLines(RubyString str, ThreadContext context, String name, IRubyObject arg, Block block, boolean wantarray) {
IRubyObject opts = ArgsUtil.getOptionsArg(context.runtime, arg);
if (opts == context.nil) {
return rbStrEnumerateLines(str, context, name, arg, context.nil, block, wantarray);
}
return rbStrEnumerateLines(str, context, name, context.runtime.getGlobalVariables().get("$/"), opts, block, wantarray);
}
private static final int NULL_POINTER = -1;
public static IRubyObject rbStrEnumerateLines(RubyString str, ThreadContext context, String name, IRubyObject arg, IRubyObject opts, Block block, boolean wantarray) {
Ruby runtime = context.runtime;
Encoding enc; IRubyObject line, orig = str;
int ptr, pend, subptr, subend, hit, adjusted;
boolean rsnewline = false;
boolean chomp = false;
if (opts != context.nil) {
IRubyObject _chomp = ArgsUtil.extractKeywordArg(context, "chomp", opts);
chomp = _chomp != null || _chomp.isTrue();
}
if (block.isGiven()) {
if (wantarray) {
if (false) {
runtime.getWarnings().warn("given block not used");
} else {
runtime.getWarnings().warning("passing a block to String#lines is deprecated");
wantarray = false;
}
}
}
else if (!wantarray) {
return enumeratorize(runtime, str, name, Helpers.arrayOf(arg, opts));
}
if (arg == context.nil) {
if (wantarray) return RubyArray.newArray(runtime, str);
else {
block.yieldSpecific(context, str);
return orig;
}
}
final RubyArray ary = wantarray ? RubyArray.newArray(runtime) : null;
final IRubyObject defaultSep = runtime.getGlobalVariables().get("$/");
RubyString rs = arg.convertToString();
str = str.newFrozen();
byte[] strBytes = str.getByteList().unsafeBytes();
ptr = subptr = str.getByteList().begin();
final int len = str.size();
pend = ptr + len;
int rslen = rs.size();
enc = (rs == defaultSep) ? str.getEncoding() : str.checkEncoding(rs);
if (rslen == 0) {
rbStrEnumerateLinesEmptySep(str, context, chomp, block, ary, strBytes, ptr, len, pend, enc, subptr);
return wantarray ? ary : orig;
}
ByteList rsByteList = rs.getByteList();
byte[] rsbytes = rsByteList.unsafeBytes();
int rsptr = rsByteList.begin();
if (rsByteList.length() == enc.minLength() && enc.isNewLine(rsbytes, rsptr, rsByteList.length())) {
rsnewline = true;
}
if (rs == defaultSep && !enc.isAsciiCompatible()) {
rs = RubyString.newString(runtime, rsbytes, rsptr, rslen);
rs = (RubyString) EncodingUtils.rbStrEncode(context, rs, runtime.getEncodingService().convertEncodingToRubyEncoding(enc), 0, context.nil);
rsByteList = rs.getByteList();
rsbytes = rsByteList.unsafeBytes();
rsptr = rsByteList.begin();
rslen = rsByteList.realSize();
}
while (subptr < pend) {
int pos = memsearch(rsbytes, rsptr, rslen, strBytes, subptr, pend - subptr, enc);
if (pos < 0) break;
hit = subptr + pos;
adjusted = enc.rightAdjustCharHead(strBytes, subptr, hit, pend);
if (hit != adjusted) {
subptr = adjusted;
continue;
}
subend = hit += rslen;
if (chomp) {
if (rsnewline) {
subend = chomp_newline(strBytes, subptr, subend, enc);
} else {
subend -= rslen;
}
}
line = str.substr(runtime, subptr - ptr, subend - subptr);
if (wantarray) ary.append(line);
else {
block.yieldSpecific(context, line);
str.modifyCheck(strBytes, len);
}
subptr = hit;
}
if (subptr != pend) {
if (chomp) {
if (rsnewline) {
pend = chomp_newline(strBytes, subptr, pend, enc);
} else if (pend - subptr >= rslen &&
ByteList.memcmp(strBytes, pend - rslen, rsbytes, rsptr, rslen) == 0) {
pend -= rslen;
}
}
line = str.substr(runtime, subptr - ptr, pend - subptr);
if (wantarray) ary.append(line);
else block.yieldSpecific(context, line);
}
return wantarray ? ary : orig;
}
private static void rbStrEnumerateLinesEmptySep(RubyString str,
ThreadContext context, final boolean chomp, Block block, RubyArray ary,
final byte[] strBytes, final int ptr, final int len,
final int pend, final Encoding enc, int subptr) {
int[] n = {0};
int eol = NULL_POINTER;
int subend = subptr;
while (subend < pend) {
int rslen;
do {
if (EncodingUtils.encAscget(strBytes, subend, pend, n, enc) != '\r') n[0] = 0;
rslen = n[0] + length(enc, strBytes, subend + n[0], pend);
if (enc.isNewLine(strBytes, subend + n[0], pend)) {
if (eol == subend) break;
subend += rslen;
if (subptr != NULL_POINTER) eol = subend;
}
else {
if (subptr == NULL_POINTER) subptr = subend;
subend += rslen;
}
rslen = 0;
} while (subend < pend);
if (subptr == NULL_POINTER) break;
RubyString line = str.makeSharedString(context.runtime, subptr - ptr,
subend - subptr + (chomp ? 0 : rslen));
if (ary != null) ary.append(line);
else {
block.yield(context, line);
str.modifyCheck(strBytes, len);
}
subptr = eol = NULL_POINTER;
}
}
private static int chomp_newline(byte[] bytes, int p, int e, Encoding enc) {
int prev = enc.prevCharHead(bytes, p, e, e);
if (enc.isNewLine(bytes, prev, e)) {
e = prev;
prev = enc.prevCharHead(bytes, p, e, e);
if (prev != -1 && EncodingUtils.encAscget(bytes, prev, e, null, enc) == '\r') e = prev;
}
return e;
}
public static int memsearch(byte[] xBytes, int x0, int m, byte[] yBytes, int y0, int n, Encoding enc) {
int x = x0, y = y0;
if (m > n) return -1;
else if (m == n) {
return ByteList.memcmp(xBytes, x0, yBytes, y0, m) == 0 ? 0 : -1;
}
else if (m < 1) {
return 0;
}
else if (m == 1) {
int ys = memchr(yBytes, y, xBytes[x], n);
if (ys != -1)
return ys - y;
else
return -1;
}
else if (m <= 8) {
return rb_memsearch_ss(xBytes, x0, m, yBytes, y0, n);
}
else if (enc == UTF8Encoding.INSTANCE){
return rb_memsearch_qs_utf8(xBytes, x0, m, yBytes, y0, n);
}
else {
return rb_memsearch_qs(xBytes, x0, m, yBytes, y0, n);
}
}
public static CodeRangeable trTransHelper(CodeRangeable self, CodeRangeable srcStr, CodeRangeable replStr, boolean sflag) {
final ByteList srcList = srcStr.getByteList();
final ByteList replList = replStr.getByteList();
int cr = self.getCodeRange();
Encoding e1 = self.checkEncoding(srcStr);
Encoding e2 = self.checkEncoding(replStr);
Encoding enc = e1 == e2 ? e1 : srcStr.checkEncoding(replStr);
final StringSupport.TR trSrc = new StringSupport.TR(srcList);
boolean cflag = false;
int[] l = {0};
if (srcStr.getByteList().getRealSize() > 1 &&
EncodingUtils.encAscget(trSrc.buf, trSrc.p, trSrc.pend, l, enc) == '^' &&
trSrc.p + 1 < trSrc.pend){
cflag = true;
trSrc.p++;
}
int c, c0, last = 0;
final int[]trans = new int[StringSupport.TRANS_SIZE];
final StringSupport.TR trRepl = new StringSupport.TR(replList);
boolean modify = false;
IntHash<Integer> hash = null;
boolean singlebyte = StringSupport.isSingleByteOptimizable(self, EncodingUtils.STR_ENC_GET(self));
if (cflag) {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = 1;
}
while ((c = StringSupport.trNext(trSrc, enc)) != -1) {
if (c < StringSupport.TRANS_SIZE) {
trans[c] = -1;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, 1);
}
}
while ((c = StringSupport.trNext(trRepl, enc)) != -1) {}
last = trRepl.now;
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
if (trans[i] != -1) {
trans[i] = last;
}
}
} else {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = -1;
}
while ((c = StringSupport.trNext(trSrc, enc)) != -1) {
int r = StringSupport.trNext(trRepl, enc);
if (r == -1) r = trRepl.now;
if (c < StringSupport.TRANS_SIZE) {
trans[c] = r;
if (codeLength(enc, r) != 1) singlebyte = false;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, r);
}
}
}
if (cr == CR_VALID && enc.isAsciiCompatible()) {
cr = CR_7BIT;
}
self.modifyAndKeepCodeRange();
int s = self.getByteList().getBegin();
int send = s + self.getByteList().getRealSize();
byte sbytes[] = self.getByteList().getUnsafeBytes();
if (sflag) {
int clen, tlen;
int max = self.getByteList().getRealSize();
int save = -1;
byte[] buf = new byte[max];
int t = 0;
while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);
s += clen;
if (c < TRANS_SIZE) {
c = trCode(c, trans, hash, cflag, last, false);
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
} else {
c = -1;
}
if (c != -1) {
if (save == c) {
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
continue;
}
save = c;
tlen = codeLength(enc, c);
modify = true;
} else {
save = -1;
c = c0;
if (enc != e1) mayModify = true;
}
while (t + tlen >= max) {
max *= 2;
buf = Arrays.copyOf(buf, max);
}
enc.codeToMbc(c, buf, t);
if (mayModify && (s >= send || ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
t += tlen;
}
self.getByteList().setUnsafeBytes(buf);
self.getByteList().setRealSize(t);
} else if (enc.isSingleByte() || (singlebyte && hash == null)) {
while (s < send) {
c = sbytes[s] & 0xff;
if (trans[c] != -1) {
if (!cflag) {
c = trans[c];
sbytes[s] = (byte)c;
} else {
sbytes[s] = (byte)last;
}
modify = true;
}
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s++;
}
} else {
int clen, tlen, max = (int)(self.getByteList().realSize() * 1.2);
byte[] buf = new byte[max];
int t = 0;
while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);
if (c < TRANS_SIZE) {
c = trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
}
else {
c = cflag ? last : -1;
}
if (c != -1) {
tlen = codeLength(enc, c);
modify = true;
} else {
c = c0;
if (enc != e1) mayModify = true;
}
while (t + tlen >= max) {
max <<= 1;
buf = Arrays.copyOf(buf, max);
}
enc.codeToMbc(c, buf, t);
if (mayModify && ByteList.memcmp(sbytes, s, buf, t, tlen) != 0) {
modify = true;
}
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s += clen;
t += tlen;
}
self.getByteList().setUnsafeBytes(buf);
self.getByteList().setRealSize(t);
}
if (modify) {
if (cr != CR_BROKEN) self.setCodeRange(cr);
StringSupport.associateEncoding(self, enc);
return self;
}
return null;
}
public static CodeRangeable trTransHelper(Ruby runtime, CodeRangeable self, CodeRangeable srcStr, CodeRangeable replStr, boolean sflag) {
try {
return trTransHelper(self, srcStr, replStr, sflag);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
private static int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cflag, int last, boolean set) {
if (c < StringSupport.TRANS_SIZE) {
return trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
return cflag ? last : -1;
} else {
return cflag ? -1 : tmp;
}
} else {
return cflag && set ? last : -1;
}
}
public static int multiByteCasecmp(Encoding enc, ByteList value, ByteList otherValue) {
byte[]bytes = value.getUnsafeBytes();
int p = value.getBegin();
int end = p + value.getRealSize();
byte[]obytes = otherValue.getUnsafeBytes();
int op = otherValue.getBegin();
int oend = op + otherValue.getRealSize();
while (p < end && op < oend) {
final int c, oc;
if (enc.isAsciiCompatible()) {
c = bytes[p] & 0xff;
oc = obytes[op] & 0xff;
} else {
c = preciseCodePoint(enc, bytes, p, end);
oc = preciseCodePoint(enc, obytes, op, oend);
}
final int cl, ocl;
if (Encoding.isAscii(c) && Encoding.isAscii(oc)) {
int dc = AsciiTables.ToUpperCaseTable[c];
int odc = AsciiTables.ToUpperCaseTable[oc];
if (dc != odc) return dc < odc ? -1 : 1;
if (enc.isAsciiCompatible()) {
cl = ocl = 1;
} else {
cl = preciseLength(enc, bytes, p, end);
ocl = preciseLength(enc, obytes, op, oend);
}
} else {
cl = length(enc, bytes, p, end);
ocl = length(enc, obytes, op, oend);
int ret = caseCmp(bytes, p, obytes, op, cl < ocl ? cl : ocl);
if (ret != 0) return ret < 0 ? -1 : 1;
if (cl != ocl) return cl < ocl ? -1 : 1;
}
p += cl;
op += ocl;
}
if (end - p == oend - op) return 0;
return end - p > oend - op ? 1 : -1;
}
public static boolean singleByteSqueeze(ByteList value, boolean squeeze[]) {
int s = value.getBegin();
int t = s;
int send = s + value.getRealSize();
final byte[] bytes = value.getUnsafeBytes();
int save = -1;
while (s < send) {
int c = bytes[s++] & 0xff;
if (c != save || !squeeze[c]) bytes[t++] = (byte)(save = c);
}
if (t - value.getBegin() != value.getRealSize()) {
value.setRealSize(t - value.getBegin());
return true;
}
return false;
}
public static boolean multiByteSqueeze(ByteList value, boolean squeeze[], TrTables tables, Encoding enc, boolean isArg) {
int s = value.getBegin();
int t = s;
int send = s + value.getRealSize();
byte[]bytes = value.getUnsafeBytes();
int save = -1;
int c;
while (s < send) {
if (enc.isAsciiCompatible() && (c = bytes[s] & 0xff) < 0x80) {
if (c != save || (isArg && !squeeze[c])) bytes[t++] = (byte)(save = c);
s++;
} else {
c = codePoint(enc, bytes, s, send);
int cl = codeLength(enc, c);
if (c != save || (isArg && !trFind(c, squeeze, tables))) {
if (t != s) enc.codeToMbc(c, bytes, t);
save = c;
t += cl;
}
s += cl;
}
}
if (t - value.getBegin() != value.getRealSize()) {
value.setRealSize(t - value.getBegin());
return true;
}
return false;
}
public static boolean multiByteSqueeze(Ruby runtime, ByteList value, boolean squeeze[], TrTables tables, Encoding enc, boolean isArg) {
try {
return multiByteSqueeze(value, squeeze, tables, enc, isArg);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
private static int rb_memsearch_ss(byte[] xsBytes, int xs, int m, byte[] ysBytes, int ys, int n) {
int y;
if ((y = memmem(ysBytes, ys, n, xsBytes, xs, m)) != -1)
return y - ys;
else
return -1;
}
public static int memmem(byte[] aBytes, int aStart, int aLen, byte[] p, int pStart, int pLen) {
int[] f = failure(p, pStart, pLen);
int j = 0;
for (int i = 0; i < aLen; i++) {
while (j > 0 && p[pStart + j] != aBytes[aStart + i]) j = f[j - 1];
if (p[pStart + j] == aBytes[aStart + i]) j++;
if (j == pLen) return aStart + i - pLen + 1;
}
return -1;
}
private static int[] failure(byte[] p, int pStart, int pLen) {
int[] f = new int[pLen];
int j = 0;
for (int i = 1; i < pLen; i++) {
while (j>0 && p[pStart + j] != p[pStart + i]) j = f[j - 1];
if (p[pStart + j] == p[pStart + i]) j++;
f[i] = j;
}
return f;
}
private static int rb_memsearch_qs(byte[] xsBytes, int xs, int m, byte[] ysBytes, int ys, int n) {
int x = xs, xe = xs + m;
int y = ys;
int qstable[] = new int[256];
Arrays.fill(qstable, m + 1);
for (; x < xe; ++x)
qstable[xsBytes[x] & 0xFF] = xe - x;
for (; y + m <= ys + n; y += qstable[ysBytes[y + m] & 0xFF]) {
if (xsBytes[xs] == ysBytes[y] && ByteList.memcmp(xsBytes, xs, ysBytes, y, m) == 0)
return y - ys;
}
return -1;
}
private static int rb_memsearch_qs_utf8_hash(byte[] xBytes, final int x) {
final int mix = 8353;
int h;
if (x != xBytes.length) {
h = xBytes[x] & 0xFF;
}
else {
h = '\0';
}
if (h < 0xC0) {
return h + 256;
}
else if (h < 0xE0) {
h *= mix;
h += xBytes[x + 1] & 0xff;
}
else if (h < 0xF0) {
h *= mix;
h += xBytes[x + 1] & 0xff;
h *= mix;
h += xBytes[x + 2] & 0xff;
}
else if (h < 0xF5) {
h *= mix;
h += xBytes[x + 1] & 0xff;
h *= mix;
h += xBytes[x + 2] & 0xff;
h *= mix;
h += xBytes[x + 3] & 0xff;
}
else {
return h + 256;
}
return ((byte)h) & 0xff;
}
private static int rb_memsearch_qs_utf8(byte[] xsBytes, int xs, int m, byte[] ysBytes, int ys, int n) {
int x = xs, xe = xs + m;
int y = ys;
int qstable[] = new int[512];
Arrays.fill(qstable, m + 1);
for (; x < xe; ++x) {
qstable[rb_memsearch_qs_utf8_hash(xsBytes, x)] = xe - x;
}
for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(ysBytes, y+m)]) {
if (xsBytes[xs] == ysBytes[y] && ByteList.memcmp(xsBytes, xs, ysBytes, y, m) == 0)
return y - ys;
}
return -1;
}
public static int checkCaseMapOptions(Ruby runtime, IRubyObject arg0, IRubyObject arg1, int flags) {
RubySymbol turkic = runtime.newSymbol("turkic");
RubySymbol lithuanian = runtime.newSymbol("lithuanian");
if (arg0.equals(turkic)) {
flags |= Config.CASE_FOLD_TURKISH_AZERI;
if (arg1.equals(lithuanian)) {
flags |= Config.CASE_FOLD_LITHUANIAN;
} else {
throw runtime.newArgumentError("invalid second option");
}
} else if (arg0.equals(lithuanian)) {
flags |= Config.CASE_FOLD_LITHUANIAN;
if (arg1.equals(turkic)) {
flags |= Config.CASE_FOLD_TURKISH_AZERI;
} else {
throw runtime.newArgumentError("invalid second option");
}
} else {
throw runtime.newArgumentError("invalid option");
}
return flags;
}
public static int checkCaseMapOptions(Ruby runtime, IRubyObject arg0, int flags) {
if (arg0.equals(runtime.newSymbol("ascii"))) {
flags |= Config.CASE_ASCII_ONLY;
} else if (arg0.equals(runtime.newSymbol("fold"))) {
if ((flags & (Config.CASE_UPCASE | Config.CASE_DOWNCASE)) == Config.CASE_DOWNCASE) {
flags ^= Config.CASE_FOLD | Config.CASE_DOWNCASE;
} else {
throw runtime.newArgumentError("option :fold only allowed for downcasing");
}
} else if (arg0.equals(runtime.newSymbol("turkic"))) {
flags |= Config.CASE_FOLD_TURKISH_AZERI;
} else if (arg0.equals(runtime.newSymbol("lithuanian"))) {
flags |= Config.CASE_FOLD_LITHUANIAN;
} else {
throw runtime.newArgumentError("invalid option");
}
return flags;
}
private static final class MappingBuffer {
MappingBuffer next;
final byte[] bytes;
int used;
MappingBuffer() {
bytes = null;
}
MappingBuffer(int size) {
bytes = new byte[size];
}
}
private static final int CASE_MAPPING_ADDITIONAL_LENGTH = 20;
public static ByteList caseMap(Ruby runtime, ByteList src, IntHolder flags, Encoding enc) {
IntHolder pp = new IntHolder();
pp.value = src.getBegin();
int end = src.getRealSize() + pp.value;
byte[]bytes = src.getUnsafeBytes();
int tgtLen = 0;
int buffers = 0;
MappingBuffer root = new MappingBuffer();
MappingBuffer buffer = root;
while (pp.value < end) {
buffer.next = new MappingBuffer((end - pp.value) * ++buffers + CASE_MAPPING_ADDITIONAL_LENGTH);
buffer = buffer.next;
int len = enc.caseMap(flags, bytes, pp, end, buffer.bytes, 0, buffer.bytes.length);
if (len < 0) throw runtime.newArgumentError("input string invalid");
buffer.used = len;
tgtLen += len;
}
final ByteList tgt;
if (buffers == 1) {
tgt = new ByteList(buffer.bytes, 0, buffer.used, enc, false);
} else {
tgt = new ByteList(tgtLen);
tgt.setEncoding(enc);
buffer = root.next;
int tgtPos = 0;
while (buffer != null) {
System.arraycopy(buffer.bytes, 0, tgt.getUnsafeBytes(), tgtPos, buffer.used);
tgtPos += buffer.used;
buffer = buffer.next;
}
}
return tgt;
}
public static void asciiOnlyCaseMap(Ruby runtime, ByteList value, IntHolder flags, Encoding enc) {
if (value.getRealSize() == 0) return;
int s = value.getBegin();
int end = s + value.getRealSize();
byte[]bytes = value.getUnsafeBytes();
IntHolder pp = new IntHolder();
pp.value = s;
int len = ASCIIEncoding.INSTANCE.caseMap(flags, bytes, pp, end, bytes, s, end);
if (len < 0) throw runtime.newArgumentError("input string invalid");
}
public static int encCoderangeClean(int cr) {
return (cr ^ (cr >> 1)) & CR_7BIT;
}
public static String byteListAsString(ByteList bytes) {
try {
Charset charset = bytes.getEncoding().getCharset();
if (charset != null) return new String(bytes.unsafeBytes(), bytes.begin(), bytes.realSize(), charset);
} catch (UnsupportedCharsetException e) {}
return new String(bytes.unsafeBytes(), bytes.begin(), bytes.realSize());
}
@Deprecated
public static boolean isUnicode(Encoding enc) {
return enc.isUnicode();
}
}