package org.jcodings.specific;
import org.jcodings.Config;
import org.jcodings.IntHolder;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.exception.ErrorCodes;
import org.jcodings.unicode.UnicodeEncoding;
abstract class BaseUTF8Encoding extends UnicodeEncoding {
static final boolean USE_INVALID_CODE_SCHEME = true;
protected BaseUTF8Encoding(int[]EncLen, int[][]Trans) {
super("UTF-8", 1, 4, EncLen, Trans);
isUTF8 = true;
}
@Override
public String getCharsetName() {
return "UTF-8";
}
@Override
public boolean isNewLine(byte[]bytes, int p, int end) {
if (p < end) {
if (bytes[p] == (byte)0x0a) return true;
if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
if (!Config.USE_CRNL_AS_LINE_TERMINATOR) {
if (bytes[p] == (byte)0x0d) return true;
}
if (p + 1 < end) {
if (bytes[p+1] == (byte)0x85 && bytes[p] == (byte)0xc2) return true;
if (p + 2 < end) {
if ((bytes[p+2] == (byte)0xa8 || bytes[p+2] == (byte)0xa9) &&
bytes[p+1] == (byte)0x80 && bytes[p] == (byte)0xe2) return true;
}
}
}
}
return false;
}
private static final int INVALID_CODE_FE = 0xfffffffe;
private static final int INVALID_CODE_FF = 0xffffffff;
private static final int VALID_CODE_LIMIT = 0x0010ffff;
@Override
public int codeToMbcLength(int code) {
if ((code & 0xffffff80) == 0) {
return 1;
} else if ((code & 0xfffff800) == 0) {
return 2;
} else if ((code & 0xffff0000) == 0) {
return 3;
} else if ((code & 0xFFFFFFFFL) <= VALID_CODE_LIMIT) {
return 4;
} else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FE) {
return 1;
} else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FF) {
return 1;
} else {
return ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE;
}
}
@Override
public int mbcToCode(byte[]bytes, int p, int end) {
int len = length(bytes, p, end);
int c = bytes[p++] & 0xff;
if (len > 1) {
len--;
int n = c & ((1 << (6 - len)) - 1);
while (len-- != 0) {
c = bytes[p++] & 0xff;
n = (n << 6) | (c & ((1 << 6) - 1));
}
return n;
} else {
if (USE_INVALID_CODE_SCHEME) {
if (c > 0xfd) return c == 0xfe ? INVALID_CODE_FE : INVALID_CODE_FF;
}
return c;
}
}
static byte trailS(int code, int shift) {
return (byte)(((code >>> shift) & 0x3f) | 0x80);
}
static byte trail0(int code) {
return (byte)((code & 0x3f) | 0x80);
}
@Override
public int codeToMbc(int code, byte[]bytes, int p) {
int p_ = p;
if ((code & 0xffffff80) == 0) {
bytes[p_] = (byte)code;
return 1;
} else {
if ((code & 0xfffff800) == 0) {
bytes[p_++] = (byte)(((code >>> 6) & 0x1f) | 0xc0);
} else if ((code & 0xffff0000) == 0) {
bytes[p_++] = (byte)(((code >>> 12) & 0x0f) | 0xe0);
bytes[p_++] = trailS(code, 6);
} else if ((code & 0xFFFFFFFFL) <= VALID_CODE_LIMIT) {
bytes[p_++] = (byte)(((code >>> 18) & 0x07) | 0xf0);
bytes[p_++] = trailS(code, 12);
bytes[p_++] = trailS(code, 6);
} else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FE) {
bytes[p_] = (byte)0xfe;
return 1;
} else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FF) {
bytes[p_] = (byte)0xff;
return 1;
} else {
return ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE;
}
bytes[p_++] = trail0(code);
return p_ - p;
}
}
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
int p = pp.value;
int foldP = 0;
if (isMbcAscii(bytes[p])) {
if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
if ((flag & Config.CASE_FOLD_TURKISH_AZERI) != 0) {
if (bytes[p] == (byte)0x49) {
fold[foldP++] = (byte)0xc4;
fold[foldP] = (byte)0xb1;
pp.value++;
return 2;
}
}
}
fold[foldP] = AsciiTables.ToLowerCaseTable[bytes[p] & 0xff];
pp.value++;
return 1;
} else {
return super.mbcCaseFold(flag, bytes, pp, end, fold);
}
}
@Override
public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
sbOut.value = 0x80;
return super.ctypeCodeRange(ctype);
}
private static boolean utf8IsLead(int c) {
return ((c & 0xc0) & 0xff) != 0x80;
}
@Override
public int leftAdjustCharHead(byte[]bytes, int p, int s, int end) {
if (s <= p) return s;
int p_ = s;
while (!utf8IsLead(bytes[p_] & 0xff) && p_ > p) p_--;
return p_;
}
@Override
public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
return true;
}
}