com.fasterxml/aalto-xml/1.2.2 : test/TestPNamePerf.java

TestPNamePerf
http://github.com/FasterXML/aalto-xml/: Ultra-high performance non-blocking XML processor (Stax/Stax2, SAX/SAX2) (FasterXML)
The Apache Software License, Version 2.0
Tatu Saloranta
package test;

import java.io.*;
import javax.xml.stream.*;


import com.fasterxml.aalto.in.*;
import com.fasterxml.aalto.util.*;

public final class TestPNamePerf
{
    final static int INT_A = 'A';

    final int mRepCount;

    int mTmpChar = 0;

    final byte[] mInputBuffer;

    final ByteBasedPNameTable mSymbols;

    final XmlCharTypes mCharTypes;

    int mInputPtr;

    int mInputLen;

    protected int[] mQuadBuffer = new int[64];

    protected char[] mNameBuffer = new char[100];

    public TestPNamePerf(byte[] data, int repCount)
    {
        mInputBuffer = data;
        mInputLen = data.length;
        mRepCount = repCount;
        ReaderConfig cfg = new ReaderConfig();
        cfg.setActualEncoding(CharsetNames.CS_UTF8);
        mSymbols = cfg.getBBSymbols();
        mCharTypes = cfg.getCharTypes();
    }

    public void test()
        throws IOException, XMLStreamException
    {
       int round = 0;

       for (; true; ++round) {
           String msg = "[null]";
           int total = 0;

           final int TYPES = 3;

           long now = System.currentTimeMillis();
           //switch (round % TYPES) {
           switch (0) {
           case 1:
               msg = "[Regular]";
               total = testRegularA();
               break;
           case 2:
               msg = "[New]";
               total = testNewA();
               break;
           case 0:
               msg = "[New/2]";
               total = testNew2A();
               break;
           default:
               throw new Error("Unexpected round, #"+round);
           }

           now = System.currentTimeMillis() - now;
           System.out.println(msg+" -> "+now+" msecs (total "+total+")");

           if ((round % TYPES) == 0) {
               System.out.println();
           }

           try { Thread.sleep(200L); } catch (Exception e) { }
           System.gc();
           try { Thread.sleep(200L); } catch (Exception e) { }
       }
    }

    private int testRegularA()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testRegular();
        }
        return total;
    }
    private int testNewA()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testNew();
        }
        return total;
    }
    private int testNew2A()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testNew2();
        }
        return total;
    }

    private int testRegular()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePName(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    private int testNew()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePNameNew(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    private int testNew2()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePNameNew2(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    protected ByteBasedPName parsePName(byte b)
        throws XMLStreamException
    {
        int q = b & 0xFF;

        if (q < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        int[] quads = mQuadBuffer;
        int qix = 0;
        int firstQuad = 0;

        while (true) {
            // Second byte
            if (mInputPtr >= mInputLen) {
                loadMoreGuaranteed();
            }
            int i2 = mInputBuffer[mInputPtr++] & 0xFF;
            /* For other bytes beyond first we have to do bit more complicated
             * check, to reliably find out where name ends. Still can do quite
             * simple checks though
             */
            if (i2 < 65) {
                // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
                if (i2 < 45 || i2 > 58 || i2 == 47) {
                    // End of name, a single ascii char?
                    return findPName(q, 1, firstQuad, qix, quads);
                }
            }
            // 3rd byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 2, firstQuad, qix, quads);
                }
            }
            // 4th byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 3, firstQuad, qix, quads);
                }
            }
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 4, firstQuad, qix, quads);
                }
            }
            if (qix == 0) { // not yet, was the first quad
                firstQuad = q;
            } else if (qix == 1) { // second quad, need to init buffer
                quads[0] = firstQuad;
                quads[1] = q;
            } else { // 3rd or after... need to make sure there's room
                if (qix >= quads.length) { // let's just double?
                    mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
                }
                quads[qix] = q;
            }
            ++qix;
            q = i2;
        }
    }

    protected ByteBasedPName parsePNameNew(byte b)
        throws XMLStreamException
    {
        // First: can we optimize out bounds checks?
        if ((mInputLen - mInputPtr) < 8) { // got 1 byte, but need 7, plus one trailing
            return parsePName(b);
        }

        int q1 = b & 0xFF;

        if (q1 < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        // If so, can also unroll loops nicely
        int i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, 1);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, 2);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
       if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, 3);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, 4);
            }
        }

        // Ok, so far so good; one quad, one byte. Then the second
        int q2 = i2;
        i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, q2, 1);
            }
        }

        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, q2, 2);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, q2, 3);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, q2, 4);
            }
        }

        // Ok, no, longer loop. Let's offline
        int[] quads = mQuadBuffer;
        quads[0] = q1;
        quads[1] = q2;
        return parsePNameNewLong(i2, quads);
    }

    protected ByteBasedPName parsePNameNew2(byte b)
        throws XMLStreamException
    {
        // First: can we optimize out bounds checks?
        if ((mInputLen - mInputPtr) < 8) { // got 1 byte, but need 7, plus one trailing
            return parsePName(b);
        }

        int q1 = b & 0xFF;
        if (q1 < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        // If so, can also unroll loops nicely
        int i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, 1);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, 2);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
       if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, 3);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, 4);
            }
        }

        // Longer, let's offline:
        return parsePNameNewMedium(i2, q1);
    }

    protected ByteBasedPName parsePNameNewMedium(int i2, int q1)
        throws XMLStreamException
    {
        // Ok, so far so good; one quad, one byte. Then the second
        int q2 = i2;
        i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, q2, 1);
            }
        }

        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, q2, 2);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, q2, 3);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, q2, 4);
            }
        }

        // Ok, no, longer loop. Let's offline
        int[] quads = mQuadBuffer;
        quads[0] = q1;
        quads[1] = q2;
        return parsePNameNewLong(i2, quads);
    }

    protected ByteBasedPName parsePNameNewLong(int q, int[] quads)
        throws XMLStreamException
    {
        int qix = 2;
        while (true) {
            // Second byte of a new quad
            if (mInputPtr >= mInputLen) {
                loadMoreGuaranteed();
            }
            int i2 = mInputBuffer[mInputPtr++] & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) {
                    // End of name, a single ascii char?
                    return findPName(q, quads, qix, 1);
                }
            }
            // 3rd byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 2);
                }
            }
            // 4th byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 3);
                }
            }
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 4);
                }
            }
            if (qix >= quads.length) { // let's just double?
                mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
            }
            quads[qix] = q;
            ++qix;
            q = i2;
        }
    }

    private final ByteBasedPName findPName(int onlyQuad, int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        int hash = ByteBasedPNameTable.calcHash(onlyQuad);
        ByteBasedPName name = mSymbols.findSymbol(hash, onlyQuad, 0);
        if (name == null) {
            // Let's simplify things a bit, and just use array based one then:
            mQuadBuffer[0] = onlyQuad;
            name = addPName(hash, mQuadBuffer, 1, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int firstQuad, int secondQuad,
                                  int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        int hash = ByteBasedPNameTable.calcHash(firstQuad, secondQuad);
        ByteBasedPName name = mSymbols.findSymbol(hash, firstQuad, secondQuad);
        if (name == null) {
            // Let's just use array, then
            mQuadBuffer[0] = firstQuad;
            mQuadBuffer[1] = secondQuad;
            name = addPName(hash, mQuadBuffer, 2, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int lastQuad, int[] quads, int qlen, int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        /* Nope, long (3 quads or more). At this point, the last quad is
         * not yet in the array, let's add:
         */
        if (qlen >= quads.length) { // let's just double?
            mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
        }
        quads[qlen++] = lastQuad;
        int hash = ByteBasedPNameTable.calcHash(quads, qlen);
        ByteBasedPName name = mSymbols.findSymbol(hash, quads, qlen);
        if (name == null) {
            name = addPName(hash, quads, qlen, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int lastQuad, int lastByteCount, int firstQuad,
                                  int qlen, int[] quads)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        // Separate handling for short names:
        if (qlen <= 1) { // short name?
            if (qlen == 0) { // 4-bytes or less; only has 'lastQuad' defined
                int hash = ByteBasedPNameTable.calcHash(lastQuad, 0);
                ByteBasedPName name = mSymbols.findSymbol(hash, lastQuad, 0);
                if (name == null) {
                    // Let's simplify things a bit, and just use array based one then:
                    quads = mQuadBuffer;
                    quads[0] = lastQuad;
                    name = addPName(hash, quads, 1, lastByteCount);
                }
                return name;
            }

            int hash = ByteBasedPNameTable.calcHash(firstQuad, lastQuad);
            ByteBasedPName name = mSymbols.findSymbol(hash, firstQuad, lastQuad);
            if (name == null) {
                // As above, let's just use array, then
                quads = mQuadBuffer;
                quads[0] = firstQuad;
                quads[1] = lastQuad;
                name = addPName(hash, quads, 2, lastByteCount);
            }
            return name;
        }
        /* Nope, long (3 quads or more). At this point, the last quad is
         * not yet in the array, let's add:
         */
        if (qlen >= quads.length) { // let's just double?
            mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
        }
        quads[qlen++] = lastQuad;
        int hash = ByteBasedPNameTable.calcHash(quads, qlen);
        ByteBasedPName name = mSymbols.findSymbol(hash, quads, qlen);
        if (name == null) {
            name = addPName(hash, quads, qlen, lastByteCount);
        }

        return name;
    }

    protected final ByteBasedPName addPName(int hash, int[] quads, int qlen, int lastQuadBytes)
        throws XMLStreamException
    {
        // 4 bytes per quad, except last one maybe less
        int byteLen = (qlen << 2) - 4 + lastQuadBytes;

        /* And last one is not correctly aligned (leading zero bytes instead
         * need to shift a bit, instead of trailing). Only need to shift it
         * for UTF-8 decoding; need revert for storage (since key will not
         * be aligned, to optimize lookup speed)
         */
        int lastQuad;

        if (lastQuadBytes < 4) {
            lastQuad = quads[qlen-1];
            // 8/16/24 bit left shift
            quads[qlen-1] = (lastQuad << ((4 - lastQuadBytes) << 3));
        } else {
            lastQuad = 0;
        }

        // Let's handle first char separately (different validation):
        int ch = (quads[0] >>> 24);
        boolean ok;
        int ix = 1;
        char[] cbuf = mNameBuffer;
        int cix  = 0;
        final int[] TYPES = mCharTypes.NAME_CHARS;

        switch (TYPES[ch]) {
        case XmlCharTypes.CT_NAME_NONE:
        case XmlCharTypes.CT_NAME_COLON: // not ok as first
        case XmlCharTypes.CT_NAME_NONFIRST:
        case InputCharTypes.CT_INPUT_NAME_MB_N:
            ok = false;
            break;
        case XmlCharTypes.CT_NAME_ANY:
            ok = true;
            break;
        default: // multi-byte (UTF-8) chars:
            {
                int needed;
                
                if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
                    ch &= 0x1F;
                    needed = 1;
                } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
                    ch &= 0x0F;
                    needed = 2;
                } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all...
                    ch &= 0x07;
                    needed = 3;
                } else { // 5- and 6-byte chars not valid xml chars
                    reportError(ch);
                    needed = ch = 1; // never really gets this far
                }
                if ((ix + needed) > byteLen) {
                    reportError(ch);
                }
                ix += needed;
                
                int q = quads[0];
                // Always need at least one more right away:
                int ch2 = (q >> 16) & 0xFF;
                if ((ch2 & 0xC0) != 0x080) {
                    reportError(ch2);
                }
                ch = (ch << 6) | (ch2 & 0x3F);
                
                /* And then may need more. Note: here we do not do all the
                 * checks that UTF-8 text decoder might do. Reason is that
                 * name validity checking methods handle most of such checks
                 */
                if (needed > 1) {
                    ch2 = (q >> 8) & 0xFF;
                    if ((ch2 & 0xC0) != 0x080) {
                        reportError(ch2);
                    }
                    ch = (ch << 6) | (ch2 & 0x3F);
                    if (needed > 2) { // 4 bytes? (need surrogates on output)
                        ch2 = q & 0xFF;
                        if ((ch2 & 0xC0) != 0x080) {
                            reportError(ch2 & 0xFF);
                        }
                        ch = (ch << 6) | (ch2 & 0x3F);
                    }
                }
                ok = XmlChars.is10NameStartChar(ch);
                if (needed > 2) { // outside of basic 16-bit range? need surrogates
                    /* so, let's first output first char (high surrogate),
                     * let second be output by later code
                     */
                    ch -= 0x10000; // to normalize it starting with 0x0
                    cbuf[cix++] = (char) (0xD800 + (ch >> 10));
                    ch = (0xDC00 | (ch & 0x03FF));
                }
            }
        }

        if (!ok) { // 0 to indicate it's first char, even with surrogates
            reportError(ch);
        }

        cbuf[cix++] = (char) ch; // the only char, or second (low) surrogate

        /* Whoa! Tons of code for just the start char. But now we get to
         * decode the name proper, at last!
         */
        int last_colon = -1;

        for (; ix < byteLen; ) {
            ch = quads[ix >> 2]; // current quad, need to shift+mask
            int byteIx = (ix & 3);
            ch = (ch >> ((3 - byteIx) << 3)) & 0xFF;
            ++ix;

            // Ascii?
            switch (TYPES[ch]) {
            case XmlCharTypes.CT_NAME_NONE:
            case XmlCharTypes.CT_MULTIBYTE_N:
                ok = false;
                break;
            case XmlCharTypes.CT_NAME_COLON: // not ok as first
                if (last_colon >= 0) {
                    reportError(0);
                }
                last_colon = cix;
                ok = true;
                break;
            case XmlCharTypes.CT_NAME_NONFIRST:
            case XmlCharTypes.CT_NAME_ANY:
                ok = true;
                break;
            default:
                {
                    int needed;
                    if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
                        ch &= 0x1F;
                        needed = 1;
                    } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
                        ch &= 0x0F;
                        needed = 2;
                    } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all...
                        ch &= 0x07;
                        needed = 3;
                    } else { // 5- and 6-byte chars not valid xml chars
                        reportError(ch);
                        needed = ch = 1; // never really gets this far
                    }
                    if ((ix + needed) > byteLen) {
                        reportError(cix);
                    }
                    
                    // Ok, always need at least one more:
                    int ch2 = quads[ix >> 2]; // current quad, need to shift+mask
                    byteIx = (ix & 3);
                    ch2 = (ch2 >> ((3 - byteIx) << 3));
                    ++ix;
                    
                    if ((ch2 & 0xC0) != 0x080) {
                        reportError(ch2);
                    }
                    ch = (ch << 6) | (ch2 & 0x3F);
                    
                    // Once again, some of validation deferred to name char validator
                    if (needed > 1) {
                        ch2 = quads[ix >> 2];
                        byteIx = (ix & 3);
                        ch2 = (ch2 >> ((3 - byteIx) << 3));
                        ++ix;
                        
                        if ((ch2 & 0xC0) != 0x080) {
                            reportError(ch2);
                        }
                        ch = (ch << 6) | (ch2 & 0x3F);
                        if (needed > 2) { // 4 bytes? (need surrogates on output)
                            ch2 = quads[ix >> 2];
                            byteIx = (ix & 3);
                            ch2 = (ch2 >> ((3 - byteIx) << 3));
                            ++ix;
                            if ((ch2 & 0xC0) != 0x080) {
                                reportError(ch2 & 0xFF);
                            }
                            ch = (ch << 6) | (ch2 & 0x3F);
                        }
                    }
                    ok = XmlChars.is10NameChar(ch);
                    if (needed > 2) { // surrogate pair? once again, let's output one here, one later on
                        ch -= 0x10000; // to normalize it starting with 0x0
                        if (cix >= cbuf.length) {
                            mNameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
                        }
                        cbuf[cix++] = (char) (0xD800 + (ch >> 10));
                        ch = 0xDC00 | (ch & 0x03FF);
                    }
                }
            }
            if (!ok) {
                reportError(cix);
            }
            if (cix >= cbuf.length) {
                mNameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
            }
            cbuf[cix++] = (char) ch;
        }

        /* Ok. Now we have the character array, and can construct the
         * String (as well as check proper composition of semicolons
         * for ns-aware mode...)
         */
        String baseName = new String(cbuf, 0, cix);
        // And finally, unalign if necessary
        if (lastQuadBytes < 4) {
            quads[qlen-1] = lastQuad;
        }
        return mSymbols.addSymbol(hash, baseName, last_colon, quads, qlen);
    }

    private void loadMoreGuaranteed()
    {
        throw new IllegalStateException();
    }

    private int loadOne()
    {
        throw new IllegalStateException();
    }

    private void reportError(int arg)
    {
        throw new IllegalStateException();
    }

    private void reportError(String msg)
    {
        throw new IllegalStateException(msg);
    }

    private static byte[] readData(File f)
        throws IOException
    {
        int len = (int) f.length();
        byte[] data = new byte[len];
        int offset = 0;
        FileInputStream fis = new FileInputStream(f);
        
        while (len > 0) {
            int count = fis.read(data, offset, len-offset);
            offset += count;
            len -= count;
        }
        fis.close();
        return data;
    }

    public static void main(String[] args)
        throws Exception
    {
        if (args.length != 1) {
            System.err.println("Usage: java ... [input file]");
            System.exit(1);
        }
        byte[] data = readData(new File(args[0]));
        int len = data.length;
        int repCount = 1;

        int THRESHOLD = 10 * 1000 * 1000;

        if (len < THRESHOLD) {
            repCount = (THRESHOLD / len);
        }
        //if (repCount > 2) { repCount /= 2; }

        System.out.println("Ok, read in test data, "+len+" bytes; using "+repCount+" repetitions");
        new TestPNamePerf(data, repCount).test();
    }
}
/

com.fasterxml/ aalto-xml/ 1.2.2/ test/TestPNamePerf.java