com.fasterxml.jackson.dataformat/jackson-dataformat-smile/2.13.1 : com/fasterxml/jackson/dataformat/smile/SmileParserBootstrapper.java

SmileParserBootstrapper
http://github.com/FasterXML/jackson-dataformats-binary: Support for reading and writing Smile ("binary JSON") encoded data using Jackson abstractions (streaming API, data binding, tree model) (FasterXML)
The Apache Software License, Version 2.0
Tatu Saloranta
package com.fasterxml.jackson.dataformat.smile;

import java.io.*;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.format.InputAccessor;
import com.fasterxml.jackson.core.format.MatchStrength;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;

import static com.fasterxml.jackson.dataformat.smile.SmileConstants.*;

Simple bootstrapper version used with Smile format parser.
/**
 * Simple bootstrapper version used with Smile format parser.
 */
public class SmileParserBootstrapper
{
    /*
    /**********************************************************************
    /* Configuration
    /**********************************************************************
     */

    protected final IOContext _context;

    protected final InputStream _in;
    
    /*
    /**********************************************************************
    /* Input buffering
    /**********************************************************************
     */

    protected final byte[] _inputBuffer;

    protected int _inputPtr;

    protected int _inputEnd;

    Flag that indicates whether buffer above is to be recycled
after being used or not.
/**
     * Flag that indicates whether buffer above is to be recycled
     * after being used or not.
     */
    protected final boolean _bufferRecyclable;

    /*
    /**********************************************************************
    /* Input location
    /**********************************************************************
     */

    Current number of input units (bytes or chars) that were processed in
previous blocks,
before contents of current input buffer.

Note: includes possible BOMs, if those were part of the input.
/**
     * Current number of input units (bytes or chars) that were processed in
     * previous blocks,
     * before contents of current input buffer.
     *<p>
     * Note: includes possible BOMs, if those were part of the input.
     */
    protected int _inputProcessed;

    /*
    /**********************************************************************
    /* Life-cycle
    /**********************************************************************
     */

    public SmileParserBootstrapper(IOContext ctxt, InputStream in)
    {
        _context = ctxt;
        _in = in;
        _inputBuffer = ctxt.allocReadIOBuffer();
        _inputEnd = _inputPtr = 0;
        _inputProcessed = 0;
        _bufferRecyclable = true;
    }

    public SmileParserBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen)
    {
        _context = ctxt;
        _in = null;
        _inputBuffer = inputBuffer;
        _inputPtr = inputStart;
        _inputEnd = (inputStart + inputLen);
        // Need to offset this for correct location info
        _inputProcessed = -inputStart;
        _bufferRecyclable = false;
    }

    public SmileParser constructParser(int factoryFeatures,
            int generalParserFeatures, int smileFeatures,
            ObjectCodec codec, ByteQuadsCanonicalizer rootByteSymbols)
        throws IOException, JsonParseException
    {
        // 13-Mar-2021, tatu: [dataformats-binary#252] Create canonicalizing OR
        //    placeholder, depending on settings
        ByteQuadsCanonicalizer can = rootByteSymbols.makeChildOrPlaceholder(factoryFeatures);
        // We just need a single byte, really, to know if it starts with header
        int end = _inputEnd;
        if ((_inputPtr < end) && (_in != null)) {
            int count = _in.read(_inputBuffer, end, _inputBuffer.length - end);
            if (count > 0) {
                _inputEnd += count;
            }
        }

        SmileParser p = new SmileParser(_context, generalParserFeatures, smileFeatures,
                codec, can, 
                _in, _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
        boolean hadSig = false;

        if (_inputPtr >= _inputEnd) { // only the case for empty doc
            // 11-Oct-2012, tatu: Actually, let's allow empty documents even if
            //   header signature would otherwise be needed. This is useful for
            //   JAX-RS provider, empty PUT/POST payloads.
            return p;
        }
        final byte firstByte = _inputBuffer[_inputPtr];
        if (firstByte == SmileConstants.HEADER_BYTE_1) {
            // need to ensure it gets properly handled so caller won't see the signature
            hadSig = p.handleSignature(true, true);
        }

        if (!hadSig && SmileParser.Feature.REQUIRE_HEADER.enabledIn(smileFeatures)) {
            // Ok, first, let's see if it looks like plain JSON...
            String msg;

            if (firstByte == '{' || firstByte == '[') {
                msg = "Input does not start with Smile format header (first byte = 0x"
                    +Integer.toHexString(firstByte & 0xFF)+") -- rather, it starts with '"+((char) firstByte)
                    +"' (plain JSON input?) -- can not parse";
            } else {
                msg = "Input does not start with Smile format header (first byte = 0x"
                +Integer.toHexString(firstByte & 0xFF)+") and parser has REQUIRE_HEADER enabled: can not parse";
            }
            throw new JsonParseException(p, msg);
        }
        return p;
    }

    /*
    /**********************************************************************
    /*  Encoding detection for data format auto-detection
    /**********************************************************************
     */

    public static MatchStrength hasSmileFormat(InputAccessor acc) throws IOException
    {
        // Ok: ideally we start with the header -- if so, we are golden
        if (!acc.hasMoreBytes()) {
            return MatchStrength.INCONCLUSIVE;
        }
        // We always need at least two bytes to determine, so
        byte b1 = acc.nextByte();
        if (!acc.hasMoreBytes()) {
            return MatchStrength.INCONCLUSIVE;
        }
        byte b2 = acc.nextByte();
        
        // First: do we see 3 "magic bytes"? If so, we are golden
        if (b1 == SmileConstants.HEADER_BYTE_1) { // yeah, looks like marker
            if (b2 != SmileConstants.HEADER_BYTE_2) {
                return MatchStrength.NO_MATCH;
            }
            if (!acc.hasMoreBytes()) {
                return MatchStrength.INCONCLUSIVE;
            }
            return (acc.nextByte() == SmileConstants.HEADER_BYTE_3) ?
                    MatchStrength.FULL_MATCH : MatchStrength.NO_MATCH;
        }
        // Otherwise: ideally either Object or Array:
        if (b1 == SmileConstants.TOKEN_LITERAL_START_OBJECT) {
            /* Object is bit easier, because now we need to get new name; i.e. can
             * rule out name back-refs
             */
            if (b2 == SmileConstants.TOKEN_KEY_LONG_STRING) {
                return MatchStrength.SOLID_MATCH;
            }
            int ch = (int) b2 & 0xFF;
            if (ch >= 0x80 && ch < 0xF8) {
                return MatchStrength.SOLID_MATCH;
            }
            return MatchStrength.NO_MATCH;
        }
        // Array bit trickier
        if (b1 == SmileConstants.TOKEN_LITERAL_START_ARRAY) {
            if (!acc.hasMoreBytes()) {
                return MatchStrength.INCONCLUSIVE;
            }
            /* For arrays, we will actually accept much wider range of values (including
             * things that could otherwise collide)
             */
            if (likelySmileValue(b2) || possibleSmileValue(b2, true)) {
                return MatchStrength.SOLID_MATCH;
            }
            return MatchStrength.NO_MATCH;
        }
        // Scalar values are pretty weak, albeit possible; require more certain match, consider it weak:
        if (likelySmileValue(b1) || possibleSmileValue(b2, false)) {
            return MatchStrength.SOLID_MATCH;
        }
        return MatchStrength.NO_MATCH;
    }

    private static boolean likelySmileValue(byte b)
    {
        if (   (b == TOKEN_MISC_LONG_TEXT_ASCII) // 0xE0
            || (b == TOKEN_MISC_LONG_TEXT_UNICODE) // 0xE4
            || (b == TOKEN_MISC_BINARY_7BIT) // 0xE8
            || (b == TOKEN_LITERAL_START_ARRAY) // 0xF8
            || (b == TOKEN_LITERAL_START_OBJECT) // 0xFA
            ) {
            return true;
        }
        int ch = b & 0xFF;
        // ASCII ctrl char range is pretty good match too
        if (ch >= 0x80 && ch <= 0x9F) {
            return true;
        }
        return false;
    }

    Params: lenient – Whether to consider more speculative matches or not
  (typically true when there is context like start-array)/**
     * @param lenient Whether to consider more speculative matches or not
     *   (typically true when there is context like start-array)
     */
    private static boolean possibleSmileValue(byte b, boolean lenient)
    {
        int ch = (int) b & 0xFF;
        // note: we know that likely matches have been handled already, so...
        if (ch >= 0x80) {
            return (ch <= 0xE0);
        }
        if (lenient) {
            if (ch >= 0x40) { // tiny/short ASCII
                return true;
            }
            if (ch >= 0x20) { // various constants
                return (ch < 0x2C); // many reserved bytes that can't be seen
            }
        }
        return false;
    }
}
/

com.fasterxml.jackson.dataformat/ jackson-dataformat-smile/ 2.13.1/ com/fasterxml/jackson/dataformat/smile/SmileParserBootstrapper.java