/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
Implements the wildcard search query. Supported wildcards are *
, which
matches any character sequence (including the empty one), and ?
,
which matches any single character. '\' is the escape character.
Note this query can be slow, as it
needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
a Wildcard term should not start with the wildcard *
This query uses the MultiTermQuery.CONSTANT_SCORE_REWRITE
rewrite method.
See Also:
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
* matches any character sequence (including the empty one), and <code>?</code>,
* which matches any single character. '\' is the escape character.
* <p>
* Note this query can be slow, as it
* needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
* a Wildcard term should not start with the wildcard <code>*</code>
*
* <p>This query uses the {@link
* MultiTermQuery#CONSTANT_SCORE_REWRITE}
* rewrite method.
*
* @see AutomatonQuery
*/
public class WildcardQuery extends AutomatonQuery {
String equality with support for wildcards /** String equality with support for wildcards */
public static final char WILDCARD_STRING = '*';
Char equality with support for wildcards /** Char equality with support for wildcards */
public static final char WILDCARD_CHAR = '?';
Escape character /** Escape character */
public static final char WILDCARD_ESCAPE = '\\';
Constructs a query for terms matching term
.
/**
* Constructs a query for terms matching <code>term</code>.
*/
public WildcardQuery(Term term) {
super(term, toAutomaton(term));
}
Constructs a query for terms matching term
.
Params: - maxDeterminizedStates – maximum number of states in the resulting
automata. If the automata would need more than this many states
TooComplextToDeterminizeException is thrown. Higher number require more
space but can process more complex automata.
/**
* Constructs a query for terms matching <code>term</code>.
* @param maxDeterminizedStates maximum number of states in the resulting
* automata. If the automata would need more than this many states
* TooComplextToDeterminizeException is thrown. Higher number require more
* space but can process more complex automata.
*/
public WildcardQuery(Term term, int maxDeterminizedStates) {
super(term, toAutomaton(term), maxDeterminizedStates);
}
Convert Lucene wildcard syntax into an automaton.
@lucene.internal
/**
* Convert Lucene wildcard syntax into an automaton.
* @lucene.internal
*/
@SuppressWarnings("fallthrough")
public static Automaton toAutomaton(Term wildcardquery) {
List<Automaton> automata = new ArrayList<>();
String wildcardText = wildcardquery.text();
for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i);
int length = Character.charCount(c);
switch(c) {
case WILDCARD_STRING:
automata.add(Automata.makeAnyString());
break;
case WILDCARD_CHAR:
automata.add(Automata.makeAnyChar());
break;
case WILDCARD_ESCAPE:
// add the next codepoint instead, if it exists
if (i + length < wildcardText.length()) {
final int nextChar = wildcardText.codePointAt(i + length);
length += Character.charCount(nextChar);
automata.add(Automata.makeChar(nextChar));
break;
} // else fallthru, lenient parsing with a trailing \
default:
automata.add(Automata.makeChar(c));
}
i += length;
}
return Operations.concatenate(automata);
}
Returns the pattern term.
/**
* Returns the pattern term.
*/
public Term getTerm() {
return term;
}
Prints a user-readable version of this query. /** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(term.text());
return buffer.toString();
}
}