/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.index;

import java.io.IOException;
import java.util.List;

import org.apache.lucene.search.DocIdSetIterator; // javadocs
import org.apache.lucene.util.PriorityQueue;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

Utility class to help merging documents from sub-readers according to either simple concatenated (unsorted) order, or by a specified index-time sort, skipping deleted documents and remapping non-deleted documents.
/** Utility class to help merging documents from sub-readers according to either simple * concatenated (unsorted) order, or by a specified index-time sort, skipping * deleted documents and remapping non-deleted documents. */
public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
Represents one sub-reader being merged
/** Represents one sub-reader being merged */
public static abstract class Sub {
Mapped doc ID
/** Mapped doc ID */
public int mappedDocID; final MergeState.DocMap docMap;
Sole constructor
/** Sole constructor */
public Sub(MergeState.DocMap docMap) { this.docMap = docMap; }
Returns the next document ID from this sub reader, and DocIdSetIterator.NO_MORE_DOCS when done
/** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
public abstract int nextDoc() throws IOException; }
Construct this from the provided subs, specifying the maximum sub count
/** Construct this from the provided subs, specifying the maximum sub count */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, int maxCount, boolean indexIsSorted) throws IOException { if (indexIsSorted && maxCount > 1) { return new SortedDocIDMerger<>(subs, maxCount); } else { return new SequentialDocIDMerger<>(subs); } }
Construct this from the provided subs
/** Construct this from the provided subs */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, boolean indexIsSorted) throws IOException { return of(subs, subs.size(), indexIsSorted); }
Reuse API, currently only used by postings during merge
/** Reuse API, currently only used by postings during merge */
public abstract void reset() throws IOException;
Returns null when done. NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.
/** Returns null when done. * <b>NOTE:</b> after the iterator has exhausted you should not call this * method, as it may result in unpredicted behavior. */
public abstract T next() throws IOException; private DocIDMerger() {} private static class SequentialDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> { private final List<T> subs; private T current; private int nextIndex; private SequentialDocIDMerger(List<T> subs) throws IOException { this.subs = subs; reset(); } @Override public void reset() throws IOException { if (subs.size() > 0) { current = subs.get(0); nextIndex = 1; } else { current = null; nextIndex = 0; } } @Override public T next() throws IOException { while (true) { int docID = current.nextDoc(); if (docID == NO_MORE_DOCS) { if (nextIndex == subs.size()) { current = null; return null; } current = subs.get(nextIndex); nextIndex++; continue; } int mappedDocID = current.docMap.get(docID); if (mappedDocID != -1) { current.mappedDocID = mappedDocID; return current; } } } } private static class SortedDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> { private final List<T> subs; private final PriorityQueue<T> queue; private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException { this.subs = subs; queue = new PriorityQueue<T>(maxCount) { @Override protected boolean lessThan(Sub a, Sub b) { assert a.mappedDocID != b.mappedDocID; return a.mappedDocID < b.mappedDocID; } }; reset(); } @Override public void reset() throws IOException { // caller may not have fully consumed the queue: queue.clear(); boolean first = true; for(T sub : subs) { if (first) { // by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue // so the first call to next() will advance it sub.mappedDocID = -1; first = false; } else { int mappedDocID; while (true) { int docID = sub.nextDoc(); if (docID == NO_MORE_DOCS) { mappedDocID = NO_MORE_DOCS; break; } mappedDocID = sub.docMap.get(docID); if (mappedDocID != -1) { break; } } if (mappedDocID == NO_MORE_DOCS) { // all docs in this sub were deleted; do not add it to the queue! continue; } sub.mappedDocID = mappedDocID; } queue.add(sub); } } @Override public T next() throws IOException { T top = queue.top(); while (true) { int docID = top.nextDoc(); if (docID == NO_MORE_DOCS) { queue.pop(); top = queue.top(); break; } int mappedDocID = top.docMap.get(docID); if (mappedDocID == -1) { // doc was deleted continue; } else { top.mappedDocID = mappedDocID; top = queue.updateTop(); break; } } return top; } } }