/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.index.sasi.analyzer;

import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.cassandra.index.sasi.analyzer.filter.BasicResultFilters;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineBuilder;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineExecutor;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineTask;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.AsciiType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.serializers.MarshalException;
import org.apache.cassandra.utils.ByteBufferUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Analyzer that does *not* tokenize the input. Optionally will apply filters for the input output as defined in analyzers options
/** * Analyzer that does *not* tokenize the input. Optionally will * apply filters for the input output as defined in analyzers options */
public class NonTokenizingAnalyzer extends AbstractAnalyzer { private static final Logger logger = LoggerFactory.getLogger(NonTokenizingAnalyzer.class); private static final Set<AbstractType<?>> VALID_ANALYZABLE_TYPES = new HashSet<AbstractType<?>>() {{ add(UTF8Type.instance); add(AsciiType.instance); }}; private AbstractType validator; private NonTokenizingOptions options; private FilterPipelineTask filterPipeline; private ByteBuffer input; private boolean hasNext = false; public void init(Map<String, String> options, AbstractType validator) { init(NonTokenizingOptions.buildFromMap(options), validator); } public void init(NonTokenizingOptions tokenizerOptions, AbstractType validator) { this.validator = validator; this.options = tokenizerOptions; this.filterPipeline = getFilterPipeline(); } public boolean hasNext() { // check that we know how to handle the input, otherwise bail if (!VALID_ANALYZABLE_TYPES.contains(validator)) return false; if (hasNext) { String inputStr; try { inputStr = validator.getString(input); if (inputStr == null) throw new MarshalException(String.format("'null' deserialized value for %s with %s", ByteBufferUtil.bytesToHex(input), validator)); Object pipelineRes = FilterPipelineExecutor.execute(filterPipeline, inputStr); if (pipelineRes == null) return false; next = validator.fromString(normalize((String) pipelineRes)); return true; } catch (MarshalException e) { logger.error("Failed to deserialize value with " + validator, e); return false; } finally { hasNext = false; } } return false; } public void reset(ByteBuffer input) { this.next = null; this.input = input; this.hasNext = true; } private FilterPipelineTask getFilterPipeline() { FilterPipelineBuilder builder = new FilterPipelineBuilder(new BasicResultFilters.NoOperation()); if (options.isCaseSensitive() && options.shouldLowerCaseOutput()) builder = builder.add("to_lower", new BasicResultFilters.LowerCase()); if (options.isCaseSensitive() && options.shouldUpperCaseOutput()) builder = builder.add("to_upper", new BasicResultFilters.UpperCase()); if (!options.isCaseSensitive()) builder = builder.add("to_lower", new BasicResultFilters.LowerCase()); return builder.build(); } @Override public boolean isCompatibleWith(AbstractType<?> validator) { return VALID_ANALYZABLE_TYPES.contains(validator); } }