/*
 * ====================================================================
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 */

package org.apache.http.message;

import java.util.BitSet;

import org.apache.http.annotation.ThreadingBehavior;
import org.apache.http.annotation.Contract;
import org.apache.http.util.CharArrayBuffer;

Low level parser for header field elements. The parsing routines of this class are designed to produce near zero intermediate garbage and make no intermediate copies of input data.

This class is immutable and thread safe.

Since:4.4
/** * Low level parser for header field elements. The parsing routines of this class are designed * to produce near zero intermediate garbage and make no intermediate copies of input data. * <p> * This class is immutable and thread safe. * * @since 4.4 */
@Contract(threading = ThreadingBehavior.IMMUTABLE) public class TokenParser { public static BitSet INIT_BITSET(final int ... b) { final BitSet bitset = new BitSet(); for (final int aB : b) { bitset.set(aB); } return bitset; }
US-ASCII CR, carriage return (13)
/** US-ASCII CR, carriage return (13) */
public static final char CR = '\r';
US-ASCII LF, line feed (10)
/** US-ASCII LF, line feed (10) */
public static final char LF = '\n';
US-ASCII SP, space (32)
/** US-ASCII SP, space (32) */
public static final char SP = ' ';
US-ASCII HT, horizontal-tab (9)
/** US-ASCII HT, horizontal-tab (9) */
public static final char HT = '\t';
Double quote
/** Double quote */
public static final char DQUOTE = '\"';
Backward slash / escape character
/** Backward slash / escape character */
public static final char ESCAPE = '\\'; public static boolean isWhitespace(final char ch) { return ch == SP || ch == HT || ch == CR || ch == LF; } public static final TokenParser INSTANCE = new TokenParser();
Extracts from the sequence of chars a token terminated with any of the given delimiters discarding semantically insignificant whitespace characters.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
  • delimiters – set of delimiting characters. Can be null if the token is not delimited by any character.
/** * Extracts from the sequence of chars a token terminated with any of the given delimiters * discarding semantically insignificant whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the token * is not delimited by any character. */
public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); }
Extracts from the sequence of chars a value which can be enclosed in quote marks and terminated with any of the given delimiters discarding semantically insignificant whitespace characters.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
  • delimiters – set of delimiting characters. Can be null if the value is not delimited by any character.
/** * Extracts from the sequence of chars a value which can be enclosed in quote marks and * terminated with any of the given delimiters discarding semantically insignificant * whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is not delimited by any character. */
public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else if (current == DQUOTE) { if (whitespace && dst.length() > 0) { dst.append(' '); } copyQuotedContent(buf, cursor, dst); whitespace = false; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyUnquotedContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); }
Skips semantically insignificant whitespace characters and moves the cursor to the closest non-whitespace character.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
/** * Skips semantically insignificant whitespace characters and moves the cursor to the closest * non-whitespace character. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer */
public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if (!isWhitespace(current)) { break; } pos++; } cursor.updatePos(pos); }
Transfers content into the destination buffer until a whitespace character or any of the given delimiters is encountered.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
  • delimiters – set of delimiting characters. Can be null if the value is delimited by a whitespace only.
  • dst – destination buffer
/** * Transfers content into the destination buffer until a whitespace character or any of * the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace only. * @param dst destination buffer */
public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) { break; } pos++; dst.append(current); } cursor.updatePos(pos); }
Transfers content into the destination buffer until a whitespace character, a quote, or any of the given delimiters is encountered.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
  • delimiters – set of delimiting characters. Can be null if the value is delimited by a whitespace or a quote only.
  • dst – destination buffer
/** * Transfers content into the destination buffer until a whitespace character, a quote, * or any of the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace or a quote only. * @param dst destination buffer */
public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current) || current == DQUOTE) { break; } pos++; dst.append(current); } cursor.updatePos(pos); }
Transfers content enclosed with quote marks into the destination buffer.
Params:
  • buf – buffer with the sequence of chars to be parsed
  • cursor – defines the bounds and current position of the buffer
  • dst – destination buffer
/** * Transfers content enclosed with quote marks into the destination buffer. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param dst destination buffer */
public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final StringBuilder dst) { if (cursor.atEnd()) { return; } int pos = cursor.getPos(); int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); char current = buf.charAt(pos); if (current != DQUOTE) { return; } pos++; indexFrom++; boolean escaped = false; for (int i = indexFrom; i < indexTo; i++, pos++) { current = buf.charAt(i); if (escaped) { if (current != DQUOTE && current != ESCAPE) { dst.append(ESCAPE); } dst.append(current); escaped = false; } else { if (current == DQUOTE) { pos++; break; } if (current == ESCAPE) { escaped = true; } else if (current != CR && current != LF) { dst.append(current); } } } cursor.updatePos(pos); } }