java/6 : sun/io/CharToByteCp949C.java

CharToByteCp949C
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package sun.io;

import sun.nio.cs.ext.IBM949C;

Author: Malcolm Ayres/**
* @author Malcolm Ayres
*/

/*
Cp949C is a hand-modified version of Cp949
maps Unicode U-005C <-> 0x5C (local code page)
 */

public class CharToByteCp949C extends CharToByteConverter
{
    private static final char SBase = '\uAC00';
    private static final char LBase = '\u1100';
    private static final char VBase = '\u1161';
    private static final char TBase = '\u11A7';
    private static final int  VCount = 21;
    private static final int  TCount = 28;
    private static final byte G0 = 0;
    private static final byte G1 = 1;
    private static final byte G2 = 2;
    private static final byte G3 = 3;
    private byte   charState = G0;
    private char   l, v, t;

    private byte[] outputByte;

    private char highHalfZoneCode;
    private int  mask1;
    private int  mask2;
    private int  shift;
    private short[] index1;
    private String index2;
    private String index2a;

    private final static IBM949C nioCoder = new IBM949C();

    public CharToByteCp949C() {
       super();
       index1 = nioCoder.getEncoderIndex1();
       index2 = nioCoder.getEncoderIndex2();
       index2a = nioCoder.getEncoderIndex2a();
       highHalfZoneCode = 0;
       outputByte = new byte[2];
       mask1 = 0xFFF8;
       mask2 = 0x0007;
       shift = 3;
    }

    flush out any residual data and reset the buffer state
/**
      * flush out any residual data and reset the buffer state
      */
    public int flush(byte[] output, int outStart, int outEnd)
        throws MalformedInputException,
               ConversionBufferFullException
    {
       int bytesOut;

       byteOff = outStart;

       if (highHalfZoneCode != 0) {
           reset();
           badInputLength = 0;
           throw new MalformedInputException();
       }

       if (charState != G0) {
           try {
              unicodeToBuffer(composeHangul() ,output, outEnd);
           }
           catch(UnknownCharacterException e) {
              reset();
              badInputLength = 0;
              throw new MalformedInputException();
           }
           charState = G0;
       }

       bytesOut = byteOff - outStart;

       reset();
       return bytesOut;
    }

    Resets converter to its initial state.
/**
     * Resets converter to its initial state.
     */
    public void reset() {
       highHalfZoneCode = 0;
       charState = G0;
       charOff = byteOff = 0;
    }

    Returns true if the given character can be converted to the
target character encoding.
/**
     * Returns true if the given character can be converted to the
     * target character encoding.
     */
    public boolean canConvert(char ch) {
       int  index;
       int  theBytes;

       index = index1[((ch & mask1) >> shift)] + (ch & mask2);
       if (index < 15000)
         theBytes = (int)(index2.charAt(index));
       else
         theBytes = (int)(index2a.charAt(index-15000));

       if (theBytes != 0)
          return (true);

       // only return true if input char was unicode null - all others are
       //    undefined
       return( ch == '\u0000');
    }

    Character conversion
/**
     * Character conversion
     */

    public int convert(char[] input, int inOff, int inEnd,
                       byte[] output, int outOff, int outEnd)
        throws UnknownCharacterException, MalformedInputException,
               ConversionBufferFullException
    {
       char    inputChar;
       int     inputSize;

       charOff = inOff;
       byteOff = outOff;

       while (charOff < inEnd) {

          if (highHalfZoneCode == 0) {
             inputChar = input[charOff];
             inputSize = 1;
          } else {
             inputChar = highHalfZoneCode;
             inputSize = 0;
             highHalfZoneCode = 0;
          }

          switch (charState) {
          case G0:

             l = LBase;
             v = VBase;
             t = TBase;

             if ( isLeadingC(inputChar) ) {     // Leading Consonant
                l = inputChar;
                charState = G1;
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = inputChar;
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             break;

          case G1:
             if ( isLeadingC(inputChar) ) {     // Leading Consonant
                l = composeLL(l, inputChar);
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = inputChar;
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;
             break;

          case G2:
             if ( isLeadingC(inputChar) ) {     // Leading Consonant

                unicodeToBuffer(composeHangul(), output, outEnd);

                l = inputChar;
                v = VBase;
                t = TBase;
                charState = G1;
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = composeVV(l, inputChar);
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;

             break;

          case G3:
             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = composeTT(t, inputChar);
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;

             break;
          }

          if (charState != G0)
             charOff++;
          else {

             // Is this a high surrogate?
             if(inputChar >= '\ud800' && inputChar <= '\udbff') {
                // Is this the last character of the input?
                if (charOff + inputSize >= inEnd) {
                   highHalfZoneCode = inputChar;
                   charOff += inputSize;
                   break;
                }

                // Is there a low surrogate following?
                inputChar = input[charOff + inputSize];
                if (inputChar >= '\udc00' && inputChar <= '\udfff') {
                   // We have a valid surrogate pair.  Too bad we don't do
                   // surrogates.  Is substitution enabled?
                   if (subMode) {
                      if (subBytes.length == 1) {
                         outputByte[0] = 0x00;
                         outputByte[1] = subBytes[0];
                      } else {
                         outputByte[0] = subBytes[0];
                         outputByte[1] = subBytes[1];
                      }

                      bytesToBuffer(outputByte, output, outEnd);
                      inputSize++;
                   } else {
                      badInputLength = 2;
                      throw new UnknownCharacterException();
                   }
                } else {
                   // We have a malformed surrogate pair
                   badInputLength = 1;
                   throw new MalformedInputException();
                }
             }

               // Is this an unaccompanied low surrogate?
             else
                if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
                   badInputLength = 1;
                   throw new MalformedInputException();
                } else {
                   unicodeToBuffer(inputChar, output, outEnd);
                }

             charOff += inputSize;

          }

       }

       return byteOff - outOff;

    }

    private char composeHangul() {
       int lIndex, vIndex, tIndex;

       lIndex = l - LBase;
       vIndex = v - VBase;
       tIndex = t - TBase;

       return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
    }

    private char composeLL(char l1, char l2) {
       return l2;
    }

    private char composeVV(char v1, char v2) {
       return v2;
    }

    private char composeTT(char t1, char t2) {
       return t2;
    }

    private boolean isLeadingC(char c) {
       return (c >= LBase && c <= '\u1159');
    }

    private boolean isVowel(char c) {
       return (c >= VBase && c <= '\u11a2');
    }

    private boolean isTrailingC(char c) {
       return (c >= TBase && c <= '\u11f9');
    }

    returns the maximum number of bytes needed to convert a char
/**
     * returns the maximum number of bytes needed to convert a char
     */
    public int getMaxBytesPerChar() {
       return 2;
    }


    Return the character set ID
/**
     * Return the character set ID
     */
    public String getCharacterEncoding() {
       return "Cp949C";
    }

    private function to add the bytes to the output buffer
/**
     * private function to add the bytes to the output buffer
     */
    private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
        throws ConversionBufferFullException,
               UnknownCharacterException {

       int spaceNeeded;

       // ensure sufficient space for the bytes(s)

       if (theBytes[0] == 0x00)
          spaceNeeded = 1;
       else
          spaceNeeded = 2;

       if (byteOff + spaceNeeded > outEnd)
          throw new ConversionBufferFullException();

       // move the data into the buffer

       if (spaceNeeded == 1)
          output[byteOff++] = theBytes[1];
       else {
          output[byteOff++] = theBytes[0];
          output[byteOff++] = theBytes[1];
       }

    }

    private function to add a unicode character to the output buffer
/**
     * private function to add a unicode character to the output buffer
     */
    private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
        throws ConversionBufferFullException,
               UnknownCharacterException {

       int index;
       int theBytes;

       // first we convert the unicode to its byte representation

       index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
       if (index < 15000)
         theBytes = (int)(index2.charAt(index));
       else
         theBytes = (int)(index2a.charAt(index-15000));
       outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
       outputByte[1] = (byte)(theBytes & 0x000000ff);

       // if the unicode was not mappable - look for the substitution bytes

       if (outputByte[0] == 0x00 && outputByte[1] == 0x00
                          && unicode != '\u0000') {
          if (subMode) {
             if (subBytes.length == 1) {
                outputByte[0] = 0x00;
                outputByte[1] = subBytes[0];
             } else {
                outputByte[0] = subBytes[0];
                outputByte[1] = subBytes[1];
             }
          } else {
             badInputLength = 1;
             throw new UnknownCharacterException();
          }
       }

       // now put the bytes in the buffer

       bytesToBuffer(outputByte, output, outEnd);

    }
}
/

java/ 6/ sun/io/CharToByteCp949C.java