graal/21.0.0.2 : substratevm/com.oracle.objectfile/com/oracle/objectfile/io/Utf8.java

/*
 * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package com.oracle.objectfile.io;

import java.io.CharConversionException;
import java.nio.ByteBuffer;

Implements UTF-8 encoding and decoding of strings with support for zero-bytes as string
terminators.
/**
 * Implements UTF-8 encoding and decoding of strings with support for zero-bytes as string
 * terminators.
 */
public final class Utf8 {

    private Utf8() {
    }

    Returns: the length in bytes of the UTF8 representation of the string/**
     * @return the length in bytes of the UTF8 representation of the string
     */
    public static int utf8Length(String string) {
        return utf8Length(string, 0, string.length());
    }

    Params: beginIndex – first index that is part of the region, inclusive
endIndex – index at the end of the region, exclusive
Returns: the length in bytes of the UTF8 representation of the string region/**
     * @param beginIndex first index that is part of the region, inclusive
     * @param endIndex index at the end of the region, exclusive
     * @return the length in bytes of the UTF8 representation of the string region
     */
    public static int utf8Length(String s, int beginIndex, int endIndex) {
        if (beginIndex < 0 || endIndex > s.length() || beginIndex > endIndex) {
            throw new StringIndexOutOfBoundsException();
        }
        int length = 0;
        for (int i = beginIndex; i < endIndex; i++) {
            final int c = s.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F)) {
                length++;
            } else if (c > 0x07FF) {
                length += 3;
            } else {
                length += 2;
            }
        }
        return length;
    }

    Writes an UTF8-encoded string region to a given byte buffer.
Params: dest – the byte buffer to write to
source – the String to be written
beginIndex – first index in source that is part of the region, inclusive
endIndex – index in source at the end of the region, exclusive
zeroTerminate – whether to write a final zero byte/**
     * Writes an UTF8-encoded string region to a given byte buffer.
     *
     * @param dest the byte buffer to write to
     * @param source the String to be written
     * @param beginIndex first index in {@code source} that is part of the region, inclusive
     * @param endIndex index in {@code source} at the end of the region, exclusive
     * @param zeroTerminate whether to write a final zero byte
     */
    public static void substringToUtf8(ByteBuffer dest, String source, int beginIndex, int endIndex, boolean zeroTerminate) {
        if (beginIndex < 0 || endIndex > source.length() || beginIndex > endIndex) {
            throw new StringIndexOutOfBoundsException();
        }
        for (int i = beginIndex; i < endIndex; i++) {
            final char c = source.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F)) {
                dest.put((byte) c);
            } else if (c > 0x07FF) {
                dest.put((byte) (0xe0 | (byte) (c >> 12)));
                dest.put((byte) (0x80 | ((c & 0xfc0) >> 6)));
                dest.put((byte) (0x80 | (c & 0x3f)));
            } else {
                dest.put((byte) (0xc0 | (byte) (c >> 6)));
                dest.put((byte) (0x80 | (c & 0x3f)));
            }
        }
        if (zeroTerminate) {
            dest.put((byte) 0);
        }
    }

    Converts a byte buffer of UTF-8 data to a String. The entire buffer until the buffer's limit is converted unless zeroTerminated is true, in which case conversion stops at the first zero byte. 
Params: zeroTerminated – if true, then a 0 byte marks the end of the string, and character '\0'
           in the input must be encoded as two bytes as opposed to one
source – the byte buffer to read from
Returns: the decoded string/**
     * Converts a byte buffer of UTF-8 data to a String. The entire buffer until the
     * {@link ByteBuffer#limit() buffer's limit} is converted unless {@code zeroTerminated} is
     * {@code true}, in which case conversion stops at the first zero byte.
     *
     * @param zeroTerminated if true, then a 0 byte marks the end of the string, and character '\0'
     *            in the input must be encoded as two bytes as opposed to one
     * @param source the byte buffer to read from
     * @return the decoded string
     */
    public static String utf8ToString(boolean zeroTerminated, ByteBuffer source) throws CharConversionException {
        final StringBuilder sb = new StringBuilder();
        while (source.hasRemaining()) {
            final int c0 = source.get() & 0xff;
            if (zeroTerminated && c0 == 0) {
                break;
            }
            switch (c0 >> 4) {
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7: {
                    /* 0xxxxxxx */
                    sb.append((char) c0);
                    break;
                }
                case 12:
                case 13: {
                    /* 110x xxxx 10xx xxxx */
                    final int c1 = source.get();
                    if ((c1 & 0xC0) != 0x80) {
                        throw new CharConversionException();
                    }
                    sb.append((char) (((c0 & 0x1F) << 6) | (c1 & 0x3F)));
                    break;
                }
                case 14: {
                    /* 1110 xxxx 10xx xxxx 10xx xxxx */
                    final int c1 = source.get();
                    final int c2 = source.get();
                    if (((c1 & 0xC0) != 0x80) || ((c2 & 0xC0) != 0x80)) {
                        throw new CharConversionException();
                    }
                    sb.append((char) (((c0 & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F)));
                    break;
                }
                default: {
                    /* 10xx xxxx, 1111 xxxx */
                    throw new CharConversionException();
                }
            }
        }
        return sb.toString();
    }

}
Params:	beginIndex – first index that is part of the region, inclusive endIndex – index at the end of the region, exclusive
Returns:	the length in bytes of the UTF8 representation of the string region
Params:	zeroTerminated – if true, then a 0 byte marks the end of the string, and character '\0' in the input must be encoded as two bytes as opposed to one source – the byte buffer to read from
Returns:	the decoded string
/

graal/ 21.0.0.2/ substratevm/com.oracle.objectfile/com/oracle/objectfile/io/Utf8.java