/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Set;
An CompositeReader
which reads multiple, parallel indexes. Each index added must have the same number of documents, and exactly the same number of leaves (with equal maxDoc
), but typically each contains different fields. Deletions are taken from the first reader. Each document contains the union of the fields of all documents with the same document number. When searching, matches for a query term are from the first index added that has the field. This is useful, e.g., with collections that have large fields which
change rarely and small fields that change more frequently. The smaller
fields may be re-indexed in a new index and both indexes may be searched
together.
Warning: It is up to you to make sure all indexes
are created and modified the same way. For example, if you add
documents to one index, you need to add the same documents in the
same order to the other indexes. Failure to do so will result in
undefined behavior. A good strategy to create suitable indexes with IndexWriter
is to use LogDocMergePolicy
, as this one does not reorder documents during merging (like TieredMergePolicy
) and triggers merges by number of documents per segment. If you use different MergePolicy
s it might happen that the segment structure of your index is no longer predictable.
/** An {@link CompositeReader} which reads multiple, parallel indexes. Each
* index added must have the same number of documents, and exactly the same
* number of leaves (with equal {@code maxDoc}), but typically each contains
* different fields. Deletions are taken from the first reader. Each document
* contains the union of the fields of all documents with the same document
* number. When searching, matches for a query term are from the first index
* added that has the field.
*
* <p>This is useful, e.g., with collections that have large fields which
* change rarely and small fields that change more frequently. The smaller
* fields may be re-indexed in a new index and both indexes may be searched
* together.
*
* <p><strong>Warning:</strong> It is up to you to make sure all indexes
* are created and modified the same way. For example, if you add
* documents to one index, you need to add the same documents in the
* same order to the other indexes. <em>Failure to do so will result in
* undefined behavior</em>.
* A good strategy to create suitable indexes with {@link IndexWriter} is to use
* {@link LogDocMergePolicy}, as this one does not reorder documents
* during merging (like {@code TieredMergePolicy}) and triggers merges
* by number of documents per segment. If you use different {@link MergePolicy}s
* it might happen that the segment structure of your index is no longer predictable.
*/
public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
private final boolean closeSubReaders;
private final Set<IndexReader> completeReaderSet =
Collections.newSetFromMap(new IdentityHashMap<IndexReader,Boolean>());
private final CacheHelper cacheHelper;
Create a ParallelCompositeReader based on the provided readers; auto-closes the given readers on IndexReader.close()
. /** Create a ParallelCompositeReader based on the provided
* readers; auto-closes the given readers on {@link #close()}. */
public ParallelCompositeReader(CompositeReader... readers) throws IOException {
this(true, readers);
}
Create a ParallelCompositeReader based on the provided
readers. /** Create a ParallelCompositeReader based on the provided
* readers. */
public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) throws IOException {
this(closeSubReaders, readers, readers);
}
Expert: create a ParallelCompositeReader based on the provided
readers and storedFieldReaders; when a document is
loaded, only storedFieldsReaders will be used. /** Expert: create a ParallelCompositeReader based on the provided
* readers and storedFieldReaders; when a document is
* loaded, only storedFieldsReaders will be used. */
public ParallelCompositeReader(boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders) throws IOException {
super(prepareLeafReaders(readers, storedFieldReaders));
this.closeSubReaders = closeSubReaders;
Collections.addAll(completeReaderSet, readers);
Collections.addAll(completeReaderSet, storedFieldReaders);
// update ref-counts (like MultiReader):
if (!closeSubReaders) {
for (final IndexReader reader : completeReaderSet) {
reader.incRef();
}
}
// finally add our own synthetic readers, so we close or decRef them, too (it does not matter what we do)
completeReaderSet.addAll(getSequentialSubReaders());
// ParallelReader instances can be short-lived, which would make caching trappy
// so we do not cache on them, unless they wrap a single reader in which
// case we delegate
if (readers.length == 1 && storedFieldReaders.length == 1 && readers[0] == storedFieldReaders[0]) {
cacheHelper = readers[0].getReaderCacheHelper();
} else {
cacheHelper = null;
}
}
private static LeafReader[] prepareLeafReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
if (readers.length == 0) {
if (storedFieldsReaders.length > 0)
throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
return new LeafReader[0];
} else {
final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();
// check compatibility:
final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
final int[] leafMaxDoc = new int[noLeaves];
for (int i = 0; i < noLeaves; i++) {
final LeafReader r = firstLeaves.get(i).reader();
leafMaxDoc[i] = r.maxDoc();
}
validate(readers, maxDoc, leafMaxDoc);
validate(storedFieldsReaders, maxDoc, leafMaxDoc);
// flatten structure of each Composite to just LeafReader[]
// and combine parallel structure with ParallelLeafReaders:
final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
for (int i = 0; i < wrappedLeaves.length; i++) {
final LeafReader[] subs = new LeafReader[readers.length];
for (int j = 0; j < readers.length; j++) {
subs[j] = readers[j].leaves().get(i).reader();
}
final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
for (int j = 0; j < storedFieldsReaders.length; j++) {
storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
}
// We pass true for closeSubs and we prevent touching of subreaders in doClose():
// By this the synthetic throw-away readers used here are completely invisible to ref-counting
wrappedLeaves[i] = new ParallelLeafReader(true, subs, storedSubs) {
@Override
protected void doClose() {}
};
}
return wrappedLeaves;
}
}
private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
for (int i = 0; i < readers.length; i++) {
final CompositeReader reader = readers[i];
final List<? extends LeafReaderContext> subs = reader.leaves();
if (reader.maxDoc() != maxDoc) {
throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
}
final int noSubs = subs.size();
if (noSubs != leafMaxDoc.length) {
throw new IllegalArgumentException("All readers must have same number of leaf readers");
}
for (int subIDX = 0; subIDX < noSubs; subIDX++) {
final LeafReader r = subs.get(subIDX).reader();
if (r.maxDoc() != leafMaxDoc[subIDX]) {
throw new IllegalArgumentException("All leaf readers must have same corresponding subReader maxDoc");
}
}
}
}
@Override
public CacheHelper getReaderCacheHelper() {
return cacheHelper;
}
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (final IndexReader reader : completeReaderSet) {
try {
if (closeSubReaders) {
reader.close();
} else {
reader.decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
}