package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
public abstract class AbstractAnalysisFactory {
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
private final Map<String,String> originalArgs;
protected final Version luceneMatchVersion;
private boolean isExplicitLuceneMatchVersion = false;
protected AbstractAnalysisFactory(Map<String,String> args) {
originalArgs = Collections.unmodifiableMap(new HashMap<>(args));
String version = get(args, LUCENE_MATCH_VERSION_PARAM);
if (version == null) {
luceneMatchVersion = Version.LATEST;
} else {
try {
luceneMatchVersion = Version.parseLeniently(version);
} catch (ParseException pe) {
throw new IllegalArgumentException(pe);
}
}
args.remove(CLASS_NAME);
}
public final Map<String,String> getOriginalArgs() {
return originalArgs;
}
public final Version getLuceneMatchVersion() {
return this.luceneMatchVersion;
}
public String require(Map<String,String> args, String name) {
String s = args.remove(name);
if (s == null) {
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return s;
}
public String require(Map<String,String> args, String name, Collection<String> allowedValues) {
return require(args, name, allowedValues, true);
}
public String require(Map<String,String> args, String name, Collection<String> allowedValues, boolean caseSensitive) {
String s = args.remove(name);
if (s == null) {
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
} else {
for (String allowedValue : allowedValues) {
if (caseSensitive) {
if (s.equals(allowedValue)) {
return s;
}
} else {
if (s.equalsIgnoreCase(allowedValue)) {
return s;
}
}
}
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
}
}
public String get(Map<String,String> args, String name) {
return args.remove(name);
}
public String get(Map<String,String> args, String name, String defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : s;
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues) {
return get(args, name, allowedValues, null);
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal) {
return get(args, name, allowedValues, defaultVal, true);
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal, boolean caseSensitive) {
String s = args.remove(name);
if (s == null) {
return defaultVal;
} else {
for (String allowedValue : allowedValues) {
if (caseSensitive) {
if (s.equals(allowedValue)) {
return s;
}
} else {
if (s.equalsIgnoreCase(allowedValue)) {
return s;
}
}
}
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
}
}
protected final int requireInt(Map<String,String> args, String name) {
return Integer.parseInt(require(args, name));
}
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Integer.parseInt(s);
}
protected final boolean requireBoolean(Map<String,String> args, String name) {
return Boolean.parseBoolean(require(args, name));
}
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Boolean.parseBoolean(s);
}
protected final float requireFloat(Map<String,String> args, String name) {
return Float.parseFloat(require(args, name));
}
protected final float getFloat(Map<String,String> args, String name, float defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Float.parseFloat(s);
}
public char requireChar(Map<String,String> args, String name) {
return require(args, name).charAt(0);
}
public char getChar(Map<String,String> args, String name, char defaultValue) {
String s = args.remove(name);
if (s == null) {
return defaultValue;
} else {
if (s.length() != 1) {
throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
} else {
return s.charAt(0);
}
}
}
private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
public Set<String> getSet(Map<String,String> args, String name) {
String s = args.remove(name);
if (s == null) {
return null;
} else {
Set<String> set = null;
Matcher matcher = ITEM_PATTERN.matcher(s);
if (matcher.find()) {
set = new HashSet<>();
set.add(matcher.group(0));
while (matcher.find()) {
set.add(matcher.group(0));
}
}
return set;
}
}
protected final Pattern getPattern(Map<String,String> args, String name) {
try {
return Pattern.compile(require(args, name));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
this.getClass().getSimpleName(), e);
}
}
protected final CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
}
}
return words;
}
protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
}
protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
InputStream stream = null;
Reader reader = null;
try {
stream = loader.openResource(file.trim());
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(stream, decoder);
WordlistLoader.getSnowballWordSet(reader, words);
} finally {
IOUtils.closeWhileHandlingException(reader, stream);
}
}
}
return words;
}
protected final List<String> splitFileNames(String fileNames) {
return splitAt(',', fileNames);
}
protected final List<String> splitAt(char separator, String list) {
if (list == null)
return Collections.emptyList();
List<String> result = new ArrayList<>();
for (String item : list.split("(?<!\\\\)[" + separator + "]")) {
result.add(item.replaceAll("\\\\(?=[" + separator + "])", ""));
}
return result;
}
private static final String CLASS_NAME = "class";
public String getClassArg() {
if (null != originalArgs) {
String className = originalArgs.get(CLASS_NAME);
if (null != className) {
return className;
}
}
return getClass().getName();
}
public boolean isExplicitLuceneMatchVersion() {
return isExplicitLuceneMatchVersion;
}
public void setExplicitLuceneMatchVersion(boolean isExplicitLuceneMatchVersion) {
this.isExplicitLuceneMatchVersion = isExplicitLuceneMatchVersion;
}
}