/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.miscellaneous;

import java.io.IOException;
import java.util.function.Function;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;

Allows skipping TokenFilters based on the current set of attributes. To use, implement the shouldFilter() method. If it returns true, then calling incrementToken() will use the wrapped TokenFilter(s) to make changes to the tokenstream. If it returns false, then the wrapped filter(s) will be skipped.
/** * Allows skipping TokenFilters based on the current set of attributes. * * To use, implement the {@link #shouldFilter()} method. If it returns {@code true}, * then calling {@link #incrementToken()} will use the wrapped TokenFilter(s) to * make changes to the tokenstream. If it returns {@code false}, then the wrapped * filter(s) will be skipped. */
public abstract class ConditionalTokenFilter extends TokenFilter { private enum TokenState { READING, PREBUFFERING, DELEGATING } private final class OneTimeWrapper extends TokenStream { private final OffsetAttribute offsetAtt; private final PositionIncrementAttribute posIncAtt; public OneTimeWrapper(AttributeSource attributeSource) { super(attributeSource); this.offsetAtt = attributeSource.addAttribute(OffsetAttribute.class); this.posIncAtt = attributeSource.addAttribute(PositionIncrementAttribute.class); } @Override public boolean incrementToken() throws IOException { if (state == TokenState.PREBUFFERING) { if (posIncAtt.getPositionIncrement() == 0) { adjustPosition = true; posIncAtt.setPositionIncrement(1); } state = TokenState.DELEGATING; return true; } assert state == TokenState.DELEGATING; if (input.incrementToken()) { if (shouldFilter()) { return true; } endOffset = offsetAtt.endOffset(); bufferedState = captureState(); } else { exhausted = true; } return false; } @Override public void reset() throws IOException { // clearing attributes etc is done by the parent stream, // so must be avoided here } @Override public void end() throws IOException { // imitate Tokenizer.end() call - endAttributes, set final offset if (exhausted) { if (endState == null) { input.end(); endState = captureState(); } endOffset = offsetAtt.endOffset(); } endAttributes(); offsetAtt.setOffset(endOffset, endOffset); } } private final TokenStream delegate; private TokenState state = TokenState.READING; private boolean lastTokenFiltered; private State bufferedState = null; private boolean exhausted; private boolean adjustPosition; private State endState = null; private int endOffset; private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
Create a new ConditionalTokenFilter
Params:
  • input – the input TokenStream
  • inputFactory – a factory function to create the wrapped filter(s)
/** * Create a new ConditionalTokenFilter * @param input the input TokenStream * @param inputFactory a factory function to create the wrapped filter(s) */
protected ConditionalTokenFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory) { super(input); this.delegate = inputFactory.apply(new OneTimeWrapper(this.input)); }
Whether or not to execute the wrapped TokenFilter(s) for the current token
/** * Whether or not to execute the wrapped TokenFilter(s) for the current token */
protected abstract boolean shouldFilter() throws IOException; @Override public void reset() throws IOException { super.reset(); this.delegate.reset(); this.state = TokenState.READING; this.lastTokenFiltered = false; this.bufferedState = null; this.exhausted = false; this.adjustPosition = false; this.endOffset = -1; this.endState = null; } @Override public void end() throws IOException { if (endState == null) { super.end(); endState = captureState(); } else { restoreState(endState); } endOffset = getAttribute(OffsetAttribute.class).endOffset(); if (lastTokenFiltered) { this.delegate.end(); endState = captureState(); } } @Override public void close() throws IOException { super.close(); this.delegate.close(); } @Override public final boolean incrementToken() throws IOException { lastTokenFiltered = false; while (true) { if (state == TokenState.READING) { if (bufferedState != null) { restoreState(bufferedState); bufferedState = null; lastTokenFiltered = false; return true; } if (exhausted == true) { return false; } if (input.incrementToken() == false) { exhausted = true; return false; } if (shouldFilter()) { lastTokenFiltered = true; state = TokenState.PREBUFFERING; // we determine that the delegate has emitted all the tokens it can at the current // position when OneTimeWrapper.incrementToken() is called in DELEGATING state. To // signal this back to the delegate, we return false, so we now need to reset it // to ensure that it can continue to emit more tokens delegate.reset(); boolean more = delegate.incrementToken(); if (more) { state = TokenState.DELEGATING; if (adjustPosition) { int posInc = posIncAtt.getPositionIncrement(); posIncAtt.setPositionIncrement(posInc - 1); } adjustPosition = false; } else { state = TokenState.READING; return endDelegating(); } return true; } return true; } if (state == TokenState.DELEGATING) { lastTokenFiltered = true; if (delegate.incrementToken()) { return true; } // no more cached tokens state = TokenState.READING; return endDelegating(); } } } private boolean endDelegating() throws IOException { if (bufferedState == null) { assert exhausted == true; return false; } delegate.end(); int posInc = posIncAtt.getPositionIncrement(); restoreState(bufferedState); // System.out.println("Buffered posInc: " + posIncAtt.getPositionIncrement() + " Delegated posInc: " + posInc); posIncAtt.setPositionIncrement(posIncAtt.getPositionIncrement() + posInc); if (adjustPosition) { posIncAtt.setPositionIncrement(posIncAtt.getPositionIncrement() - 1); adjustPosition = false; } bufferedState = null; return true; } }