package org.jcodings.specific;
import org.jcodings.IntHolder;
import org.jcodings.MultiByteEncoding;
import org.jcodings.ascii.AsciiTables;
public final class GB18030Encoding extends MultiByteEncoding {
private static final String GB18030 = "GB18030";
protected GB18030Encoding() {
super(GB18030, 1, 4, null, GB18030Trans, AsciiTables.AsciiCtypeTable);
}
@Override
public int length(byte[]bytes, int p, int end) {
int s = TransZero[bytes[p] & 0xff];
if (s < 0) return s == A ? 1 : CHAR_INVALID;
return lengthForTwoUptoFour(bytes, p, end, s);
}
private int lengthForTwoUptoFour(byte[]bytes, int p, int end, int s) {
if (++p == end) return missing(1);
s = Trans[s][bytes[p] & 0xff];
if (s < 0) return s == A ? 2 : CHAR_INVALID;
return lengthForThreeUptoFour(bytes, p, end, s);
}
private int lengthForThreeUptoFour(byte[]bytes, int p, int end, int s) {
if (++p == end) return missing(2);
s = Trans[s][bytes[p] & 0xff];
if (s < 0) return s == A ? 3 : CHAR_INVALID;
if (++p == end) return missing(1);
s = Trans[s][bytes[p] & 0xff];
return s == A ? 4 : CHAR_INVALID;
}
@Override
public int mbcToCode(byte[]bytes, int p, int end) {
return mbnMbcToCode(bytes, p, end);
}
@Override
public int codeToMbcLength(int code) {
return mb4CodeToMbcLength(code);
}
@Override
public int codeToMbc(int code, byte[]bytes, int p) {
return mb4CodeToMbc(code, bytes, p);
}
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
return mbnMbcCaseFold(flag, bytes, pp, end, lower);
}
@Override
public boolean isCodeCType(int code, int ctype) {
return mb4IsCodeCType(code, ctype);
}
@Override
public int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
return null;
}
@Override
public String getCharsetName() {
return GB18030;
}
private enum State {
START,
One_C2,
One_C4,
One_CM,
Odd_CM_One_CX,
Even_CM_One_CX,
One_CMC4,
Odd_CMC4,
One_C4_Odd_CMC4,
Even_CMC4,
One_C4_Even_CMC4,
Odd_CM_Odd_CMC4,
Even_CM_Odd_CMC4,
Odd_CM_Even_CMC4,
Even_CM_Even_CMC4,
Odd_C4CM,
One_CM_Odd_C4CM,
Even_C4CM,
One_CM_Even_C4CM,
Even_CM_Odd_C4CM,
Odd_CM_Odd_C4CM,
Even_CM_Even_C4CM,
Odd_CM_Even_C4CM
};
@Override
public int leftAdjustCharHead(byte[]bytes, int start, int s, int end) {
State state = State.START;
for (int p = s; p >= start; p--) {
int pByte = bytes[p] & 0xff;
switch (state) {
case START:
switch (GB18030_MAP[pByte]) {
case C1:
return s;
case C2:
state = State.One_C2;
break;
case C4:
state = State.One_C4;
break;
case CM:
state = State.One_CM;
break;
}
break;
case One_C2:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = State.Odd_CM_One_CX;
break;
}
break;
case One_C4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = State.One_CMC4;
break;
}
break;
case One_CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return s;
case C4:
state = State.Odd_C4CM;
break;
case CM:
state = State.Odd_CM_One_CX;
break;
}
break;
case Odd_CM_One_CX:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = State.Even_CM_One_CX;
break;
}
break;
case Even_CM_One_CX:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = State.Odd_CM_One_CX;
break;
}
break;
case One_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return (s - 1);
case C4:
state = State.One_C4_Odd_CMC4;
break;
case CM:
state = State.Even_CM_One_CX;
break;
}
break;
case Odd_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return (s - 1);
case C4:
state = State.One_C4_Odd_CMC4;
break;
case CM:
state = State.Odd_CM_Odd_CMC4;
break;
}
break;
case One_C4_Odd_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = State.Even_CMC4;
break;
}
break;
case Even_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return (s - 3);
case C4:
state = State.One_C4_Even_CMC4;
break;
case CM:
state = State.Odd_CM_Even_CMC4;
break;
}
break;
case One_C4_Even_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = State.Odd_CMC4;
break;
}
break;
case Odd_CM_Odd_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = State.Even_CM_Odd_CMC4;
break;
}
break;
case Even_CM_Odd_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = State.Odd_CM_Odd_CMC4;
break;
}
break;
case Odd_CM_Even_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = State.Even_CM_Even_CMC4;
break;
}
break;
case Even_CM_Even_CMC4:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = State.Odd_CM_Even_CMC4;
break;
}
break;
case Odd_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = State.One_CM_Odd_C4CM;
break;
}
break;
case One_CM_Odd_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return (s - 2);
case C4:
state = State.Even_C4CM;
break;
case CM:
state = State.Even_CM_Odd_C4CM;
break;
}
break;
case Even_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 2);
case CM:
state = State.One_CM_Even_C4CM;
break;
}
break;
case One_CM_Even_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
return (s - 0);
case C4:
state = State.Odd_C4CM;
break;
case CM:
state = State.Even_CM_Even_C4CM;
break;
}
break;
case Even_CM_Odd_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 0);
case CM:
state = State.Odd_CM_Odd_C4CM;
break;
}
break;
case Odd_CM_Odd_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 2);
case CM:
state = State.Even_CM_Odd_C4CM;
break;
}
break;
case Even_CM_Even_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 2);
case CM:
state = State.Odd_CM_Even_C4CM;
break;
}
break;
case Odd_CM_Even_C4CM:
switch (GB18030_MAP[pByte]) {
case C1:
case C2:
case C4:
return (s - 0);
case CM:
state = State.Even_CM_Even_C4CM;
break;
}
break;
}
}
switch (state) {
case START: return (s - 0);
case One_C2: return (s - 0);
case One_C4: return (s - 0);
case One_CM: return (s - 0);
case Odd_CM_One_CX: return (s - 1);
case Even_CM_One_CX: return (s - 0);
case One_CMC4: return (s - 1);
case Odd_CMC4: return (s - 1);
case One_C4_Odd_CMC4: return (s - 1);
case Even_CMC4: return (s - 3);
case One_C4_Even_CMC4: return (s - 3);
case Odd_CM_Odd_CMC4: return (s - 3);
case Even_CM_Odd_CMC4: return (s - 1);
case Odd_CM_Even_CMC4: return (s - 1);
case Even_CM_Even_CMC4: return (s - 3);
case Odd_C4CM: return (s - 0);
case One_CM_Odd_C4CM: return (s - 2);
case Even_C4CM: return (s - 2);
case One_CM_Even_C4CM: return (s - 0);
case Even_CM_Odd_C4CM: return (s - 0);
case Odd_CM_Odd_C4CM: return (s - 2);
case Even_CM_Even_C4CM: return (s - 2);
case Odd_CM_Even_C4CM: return (s - 0);
}
return s;
}
@Override
public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
return GB18030_MAP[bytes[p] & 0xff] == C1;
}
private static final int C1 = 0;
private static final int C2 = 1;
private static final int C4 = 2;
private static final int CM = 3;
private static final int GB18030_MAP[] = {
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
};
private static final int GB18030Trans[][] = new int[][]{
{
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
},
{
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
},
{
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F
},
{
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
}
};
public static final GB18030Encoding INSTANCE = new GB18030Encoding();
}