package org.apache.lucene.codecs.uniformsplit;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import static org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat.*;
public class UniformSplitTermsReader extends FieldsProducer {
private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(UniformSplitTermsReader.class)
+ RamUsageEstimator.shallowSizeOfInstance(IndexInput.class) * 2;
protected final PostingsReaderBase postingsReader;
protected final int version;
protected final IndexInput blockInput;
protected final IndexInput dictionaryInput;
protected final Map<String, UniformSplitTerms> fieldToTermsMap;
protected final Collection<String> sortedFieldNames;
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
boolean dictionaryOnHeap) throws IOException {
this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
}
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException {
IndexInput dictionaryInput = null;
IndexInput blockInput = null;
boolean success = false;
try {
this.postingsReader = postingsReader;
String segmentName = state.segmentInfo.name;
String termsName = IndexFileNames.segmentFileName(segmentName, state.segmentSuffix, termsBlocksExtension);
blockInput = state.directory.openInput(termsName, state.context);
version = CodecUtil.checkIndexHeader(blockInput, codecName, versionStart,
versionCurrent, state.segmentInfo.getId(), state.segmentSuffix);
String indexName = IndexFileNames.segmentFileName(segmentName, state.segmentSuffix, dictionaryExtension);
dictionaryInput = state.directory.openInput(indexName, state.context);
CodecUtil.checkIndexHeader(dictionaryInput, codecName, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.checksumEntireFile(dictionaryInput);
postingsReader.init(blockInput, state);
CodecUtil.retrieveChecksum(blockInput);
seekFieldsMetadata(blockInput);
Collection<FieldMetadata> fieldMetadataCollection =
readFieldsMetadata(blockInput, blockDecoder, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());
fieldToTermsMap = new HashMap<>();
this.blockInput = blockInput;
this.dictionaryInput = dictionaryInput;
fillFieldMap(postingsReader, state, blockDecoder, dictionaryOnHeap, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);
List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet());
Collections.sort(fieldNames);
sortedFieldNames = Collections.unmodifiableList(fieldNames);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(blockInput, dictionaryInput);
}
}
}
protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, fieldMetadata, blockDecoder, dictionaryOnHeap);
fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
new UniformSplitTerms(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier));
}
}
protected IndexDictionary.BrowserSupplier createDictionaryBrowserSupplier(SegmentReadState state, IndexInput dictionaryInput, FieldMetadata fieldMetadata,
BlockDecoder blockDecoder, boolean dictionaryOnHeap) throws IOException {
return new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder, dictionaryOnHeap);
}
protected Collection<FieldMetadata> readFieldsMetadata(IndexInput indexInput, BlockDecoder blockDecoder, FieldInfos fieldInfos,
FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
int numFields = indexInput.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
}
return (blockDecoder != null && version >= VERSION_ENCODABLE_FIELDS_METADATA) ?
readEncodedFieldsMetadata(numFields, indexInput, blockDecoder, fieldInfos, fieldMetadataReader, maxNumDocs)
: readUnencodedFieldsMetadata(numFields, indexInput, fieldInfos, fieldMetadataReader, maxNumDocs);
}
protected Collection<FieldMetadata> readEncodedFieldsMetadata(int numFields, DataInput metadataInput, BlockDecoder blockDecoder,
FieldInfos fieldInfos, FieldMetadata.Serializer fieldMetadataReader,
int maxNumDocs) throws IOException {
long encodedLength = metadataInput.readVLong();
if (encodedLength < 0) {
throw new CorruptIndexException("Illegal encoded length: " + encodedLength, metadataInput);
}
BytesRef decodedBytes = blockDecoder.decode(metadataInput, encodedLength);
DataInput decodedMetadataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
return readUnencodedFieldsMetadata(numFields, decodedMetadataInput, fieldInfos, fieldMetadataReader, maxNumDocs);
}
protected Collection<FieldMetadata> readUnencodedFieldsMetadata(int numFields, DataInput metadataInput, FieldInfos fieldInfos,
FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
for (int i = 0; i < numFields; i++) {
fieldMetadataCollection.add(fieldMetadataReader.read(metadataInput, fieldInfos, maxNumDocs));
}
return fieldMetadataCollection;
}
@Override
public void close() throws IOException {
try {
IOUtils.close(blockInput, dictionaryInput, postingsReader);
} finally {
fieldToTermsMap.clear();
}
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(blockInput);
postingsReader.checkIntegrity();
}
@Override
public Iterator<String> iterator() {
return sortedFieldNames.iterator();
}
@Override
public Terms terms(String field) {
return fieldToTermsMap.get(field);
}
@Override
public int size() {
return fieldToTermsMap.size();
}
@Override
public long ramBytesUsed() {
long ramUsage = BASE_RAM_USAGE;
ramUsage += postingsReader.ramBytesUsed();
ramUsage += RamUsageUtil.ramBytesUsedByHashMapOfSize(fieldToTermsMap.size());
ramUsage += getTermsRamBytesUsed();
ramUsage += RamUsageUtil.ramBytesUsedByUnmodifiableArrayListOfSize(sortedFieldNames.size());
return ramUsage;
}
protected long getTermsRamBytesUsed() {
long ramUsage = 0L;
for (UniformSplitTerms terms : fieldToTermsMap.values()) {
ramUsage += terms.ramBytesUsed();
}
return ramUsage;
}
protected void seekFieldsMetadata(IndexInput indexInput) throws IOException {
indexInput.seek(indexInput.length() - CodecUtil.footerLength() - 8);
indexInput.seek(indexInput.readLong());
}
}