/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.miscellaneous;


import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

import java.io.IOException;

This TokenFilter limits the number of tokens while indexing. It is a replacement for the maximum field length setting inside IndexWriter.

By default, this filter ignores any tokens in the wrapped TokenStream once the limit has been reached, which can result in reset() being called prior to incrementToken() returning false. For most TokenStream implementations this should be acceptable, and faster then consuming the full stream. If you are wrapping a TokenStream which requires that the full stream of tokens be exhausted in order to function properly, use the consumeAllTokens option.

/** * This TokenFilter limits the number of tokens while indexing. It is * a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}. * <p> * By default, this filter ignores any tokens in the wrapped {@code TokenStream} * once the limit has been reached, which can result in {@code reset()} being * called prior to {@code incrementToken()} returning {@code false}. For most * {@code TokenStream} implementations this should be acceptable, and faster * then consuming the full stream. If you are wrapping a {@code TokenStream} * which requires that the full stream of tokens be exhausted in order to * function properly, use the * {@link #LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens} * option. */
public final class LimitTokenCountFilter extends TokenFilter { private final int maxTokenCount; private final boolean consumeAllTokens; private int tokenCount = 0; private boolean exhausted = false;
Build a filter that only accepts tokens up to a maximum number. This filter will not consume any tokens beyond the maxTokenCount limit
See Also:
  • LimitTokenCountFilter(TokenStream, int, boolean)
/** * Build a filter that only accepts tokens up to a maximum number. * This filter will not consume any tokens beyond the maxTokenCount limit * * @see #LimitTokenCountFilter(TokenStream,int,boolean) */
public LimitTokenCountFilter(TokenStream in, int maxTokenCount) { this(in, maxTokenCount, false); }
Build an filter that limits the maximum number of tokens per field.
Params:
  • in – the stream to wrap
  • maxTokenCount – max number of tokens to produce
  • consumeAllTokens – whether all tokens from the input must be consumed even if maxTokenCount is reached.
/** * Build an filter that limits the maximum number of tokens per field. * @param in the stream to wrap * @param maxTokenCount max number of tokens to produce * @param consumeAllTokens whether all tokens from the input must be consumed even if maxTokenCount is reached. */
public LimitTokenCountFilter(TokenStream in, int maxTokenCount, boolean consumeAllTokens) { super(in); if (maxTokenCount < 1) { throw new IllegalArgumentException("maxTokenCount must be greater than zero"); } this.maxTokenCount = maxTokenCount; this.consumeAllTokens = consumeAllTokens; } @Override public boolean incrementToken() throws IOException { if (exhausted) { return false; } else if (tokenCount < maxTokenCount) { if (input.incrementToken()) { tokenCount++; return true; } else { exhausted = true; return false; } } else { while (consumeAllTokens && input.incrementToken()) { /* NOOP */ } return false; } } @Override public void reset() throws IOException { super.reset(); tokenCount = 0; exhausted = false; } }