/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.search.DocIdSetIterator; // javadocs
import org.apache.lucene.util.PriorityQueue;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
Utility class to help merging documents from sub-readers according to either simple
concatenated (unsorted) order, or by a specified index-time sort, skipping
deleted documents and remapping non-deleted documents. /** Utility class to help merging documents from sub-readers according to either simple
* concatenated (unsorted) order, or by a specified index-time sort, skipping
* deleted documents and remapping non-deleted documents. */
public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
Represents one sub-reader being merged /** Represents one sub-reader being merged */
public static abstract class Sub {
Mapped doc ID /** Mapped doc ID */
public int mappedDocID;
final MergeState.DocMap docMap;
Sole constructor /** Sole constructor */
public Sub(MergeState.DocMap docMap) {
this.docMap = docMap;
}
Returns the next document ID from this sub reader, and DocIdSetIterator.NO_MORE_DOCS
when done /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
public abstract int nextDoc() throws IOException;
}
Construct this from the provided subs, specifying the maximum sub count /** Construct this from the provided subs, specifying the maximum sub count */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, int maxCount, boolean indexIsSorted) throws IOException {
if (indexIsSorted && maxCount > 1) {
return new SortedDocIDMerger<>(subs, maxCount);
} else {
return new SequentialDocIDMerger<>(subs);
}
}
Construct this from the provided subs /** Construct this from the provided subs */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, boolean indexIsSorted) throws IOException {
return of(subs, subs.size(), indexIsSorted);
}
Reuse API, currently only used by postings during merge /** Reuse API, currently only used by postings during merge */
public abstract void reset() throws IOException;
Returns null when done.
NOTE: after the iterator has exhausted you should not call this
method, as it may result in unpredicted behavior. /** Returns null when done.
* <b>NOTE:</b> after the iterator has exhausted you should not call this
* method, as it may result in unpredicted behavior. */
public abstract T next() throws IOException;
private DocIDMerger() {}
private static class SequentialDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
private final List<T> subs;
private T current;
private int nextIndex;
private SequentialDocIDMerger(List<T> subs) throws IOException {
this.subs = subs;
reset();
}
@Override
public void reset() throws IOException {
if (subs.size() > 0) {
current = subs.get(0);
nextIndex = 1;
} else {
current = null;
nextIndex = 0;
}
}
@Override
public T next() throws IOException {
while (true) {
int docID = current.nextDoc();
if (docID == NO_MORE_DOCS) {
if (nextIndex == subs.size()) {
current = null;
return null;
}
current = subs.get(nextIndex);
nextIndex++;
continue;
}
int mappedDocID = current.docMap.get(docID);
if (mappedDocID != -1) {
current.mappedDocID = mappedDocID;
return current;
}
}
}
}
private static class SortedDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
private final List<T> subs;
private final PriorityQueue<T> queue;
private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException {
this.subs = subs;
queue = new PriorityQueue<T>(maxCount) {
@Override
protected boolean lessThan(Sub a, Sub b) {
assert a.mappedDocID != b.mappedDocID;
return a.mappedDocID < b.mappedDocID;
}
};
reset();
}
@Override
public void reset() throws IOException {
// caller may not have fully consumed the queue:
queue.clear();
boolean first = true;
for(T sub : subs) {
if (first) {
// by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue
// so the first call to next() will advance it
sub.mappedDocID = -1;
first = false;
} else {
int mappedDocID;
while (true) {
int docID = sub.nextDoc();
if (docID == NO_MORE_DOCS) {
mappedDocID = NO_MORE_DOCS;
break;
}
mappedDocID = sub.docMap.get(docID);
if (mappedDocID != -1) {
break;
}
}
if (mappedDocID == NO_MORE_DOCS) {
// all docs in this sub were deleted; do not add it to the queue!
continue;
}
sub.mappedDocID = mappedDocID;
}
queue.add(sub);
}
}
@Override
public T next() throws IOException {
T top = queue.top();
while (true) {
int docID = top.nextDoc();
if (docID == NO_MORE_DOCS) {
queue.pop();
top = queue.top();
break;
}
int mappedDocID = top.docMap.get(docID);
if (mappedDocID == -1) {
// doc was deleted
continue;
} else {
top.mappedDocID = mappedDocID;
top = queue.updateTop();
break;
}
}
return top;
}
}
}