/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

import static org.apache.lucene.index.IndexWriter.isCongruentSort;

Holds common state used during segment merging.
@lucene.experimental
/** Holds common state used during segment merging. * * @lucene.experimental */
public class MergeState {
Maps document IDs from old segments to document IDs in the new segment
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps; // Only used by IW when it must remap deletes that arrived against the merging segments while a merge was running: final DocMap[] leafDocMaps;
SegmentInfo of the newly merged segment.
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
FieldInfos of the newly merged segment.
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos mergeFieldInfos;
Stored field producers being merged
/** Stored field producers being merged */
public final StoredFieldsReader[] storedFieldsReaders;
Term vector producers being merged
/** Term vector producers being merged */
public final TermVectorsReader[] termVectorsReaders;
Norms producers being merged
/** Norms producers being merged */
public final NormsProducer[] normsProducers;
DocValues producers being merged
/** DocValues producers being merged */
public final DocValuesProducer[] docValuesProducers;
FieldInfos being merged
/** FieldInfos being merged */
public final FieldInfos[] fieldInfos;
Live docs for each reader
/** Live docs for each reader */
public final Bits[] liveDocs;
Postings to merge
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
Point readers to merge
/** Point readers to merge */
public final PointsReader[] pointsReaders;
Max docs per reader
/** Max docs per reader */
public final int[] maxDocs;
InfoStream for debugging messages.
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
Indicates if the index needs to be sorted
/** Indicates if the index needs to be sorted **/
public boolean needsIndexSort;
Sole constructor.
/** Sole constructor. */
MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException { this.infoStream = infoStream; final Sort indexSort = segmentInfo.getIndexSort(); int numReaders = originalReaders.size(); leafDocMaps = new DocMap[numReaders]; List<CodecReader> readers = maybeSortReaders(originalReaders, segmentInfo); maxDocs = new int[numReaders]; fieldsProducers = new FieldsProducer[numReaders]; normsProducers = new NormsProducer[numReaders]; storedFieldsReaders = new StoredFieldsReader[numReaders]; termVectorsReaders = new TermVectorsReader[numReaders]; docValuesProducers = new DocValuesProducer[numReaders]; pointsReaders = new PointsReader[numReaders]; fieldInfos = new FieldInfos[numReaders]; liveDocs = new Bits[numReaders]; int numDocs = 0; for(int i=0;i<numReaders;i++) { final CodecReader reader = readers.get(i); maxDocs[i] = reader.maxDoc(); liveDocs[i] = reader.getLiveDocs(); fieldInfos[i] = reader.getFieldInfos(); normsProducers[i] = reader.getNormsReader(); if (normsProducers[i] != null) { normsProducers[i] = normsProducers[i].getMergeInstance(); } docValuesProducers[i] = reader.getDocValuesReader(); if (docValuesProducers[i] != null) { docValuesProducers[i] = docValuesProducers[i].getMergeInstance(); } storedFieldsReaders[i] = reader.getFieldsReader(); if (storedFieldsReaders[i] != null) { storedFieldsReaders[i] = storedFieldsReaders[i].getMergeInstance(); } termVectorsReaders[i] = reader.getTermVectorsReader(); if (termVectorsReaders[i] != null) { termVectorsReaders[i] = termVectorsReaders[i].getMergeInstance(); } fieldsProducers[i] = reader.getPostingsReader().getMergeInstance(); pointsReaders[i] = reader.getPointsReader(); if (pointsReaders[i] != null) { pointsReaders[i] = pointsReaders[i].getMergeInstance(); } numDocs += reader.numDocs(); } segmentInfo.setMaxDoc(numDocs); this.segmentInfo = segmentInfo; this.docMaps = buildDocMaps(readers, indexSort); } // Remap docIDs around deletions private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) { int totalDocs = 0; int numReaders = readers.size(); DocMap[] docMaps = new DocMap[numReaders]; for (int i = 0; i < numReaders; i++) { LeafReader reader = readers.get(i); Bits liveDocs = reader.getLiveDocs(); final PackedLongValues delDocMap; if (liveDocs != null) { delDocMap = removeDeletes(reader.maxDoc(), liveDocs); } else { delDocMap = null; } final int docBase = totalDocs; docMaps[i] = new DocMap() { @Override public int get(int docID) { if (liveDocs == null) { return docBase + docID; } else if (liveDocs.get(docID)) { return docBase + (int) delDocMap.get(docID); } else { return -1; } } }; totalDocs += reader.numDocs(); } return docMaps; } private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException { if (indexSort == null) { // no index sort ... we only must map around deletions, and rebase to the merged segment's docID space return buildDeletionDocMaps(readers); } else { // do a merge sort of the incoming leaves: long t0 = System.nanoTime(); DocMap[] result = MultiSorter.sort(indexSort, readers); if (result == null) { // already sorted so we can switch back to map around deletions return buildDeletionDocMaps(readers); } else { needsIndexSort = true; } long t1 = System.nanoTime(); if (infoStream.isEnabled("SM")) { infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0)); } return result; } } private List<CodecReader> maybeSortReaders(List<CodecReader> originalReaders, SegmentInfo segmentInfo) throws IOException { // Default to identity: for(int i=0;i<originalReaders.size();i++) { leafDocMaps[i] = new DocMap() { @Override public int get(int docID) { return docID; } }; } Sort indexSort = segmentInfo.getIndexSort(); if (indexSort == null) { return originalReaders; } List<CodecReader> readers = new ArrayList<>(originalReaders.size()); for (CodecReader leaf : originalReaders) { Sort segmentSort = leaf.getMetaData().getSort(); if (segmentSort == null || isCongruentSort(indexSort, segmentSort) == false) { throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + (segmentSort == null ? "null" : segmentSort)); } readers.add(leaf); } return readers; }
A map of doc IDs.
/** A map of doc IDs. */
public static abstract class DocMap {
Sole constructor
/** Sole constructor */
public DocMap() { }
Return the mapped docID or -1 if the given doc is not mapped.
/** Return the mapped docID or -1 if the given doc is not mapped. */
public abstract int get(int docID); } static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) { final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); int del = 0; for (int i = 0; i < maxDoc; ++i) { docMapBuilder.add(i - del); if (liveDocs.get(i) == false) { ++del; } } return docMapBuilder.build(); } }