org.apache.lucene/lucene-core/8.2.0 : org/apache/lucene/index/ParallelLeafReader.java

ParallelLeafReader
http://lucene.apache.org/lucene-parent/lucene-core: Apache Lucene Java Core (The Apache Software Foundation)
Apache 2
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;

import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;

An LeafReader which reads multiple, parallel indexes. Each index added must have the same number of documents, but typically each contains different fields. Deletions are taken from the first reader. Each document contains the union of the fields of all documents with the same document number. When searching, matches for a query term are from the first index added that has the field. This is useful, e.g., with collections that have large fields which
change rarely and small fields that change more frequently.  The smaller
fields may be re-indexed in a new index and both indexes may be searched
together.
Warning: It is up to you to make sure all indexes
are created and modified the same way. For example, if you add
documents to one index, you need to add the same documents in the
same order to the other indexes. Failure to do so will result in
undefined behavior.
/** An {@link LeafReader} which reads multiple, parallel indexes.  Each index
 * added must have the same number of documents, but typically each contains
 * different fields. Deletions are taken from the first reader.
 * Each document contains the union of the fields of all documents
 * with the same document number.  When searching, matches for a
 * query term are from the first index added that has the field.
 *
 * <p>This is useful, e.g., with collections that have large fields which
 * change rarely and small fields that change more frequently.  The smaller
 * fields may be re-indexed in a new index and both indexes may be searched
 * together.
 * 
 * <p><strong>Warning:</strong> It is up to you to make sure all indexes
 * are created and modified the same way. For example, if you add
 * documents to one index, you need to add the same documents in the
 * same order to the other indexes. <em>Failure to do so will result in
 * undefined behavior</em>.
 */
public class ParallelLeafReader extends LeafReader {
  private final FieldInfos fieldInfos;
  private final LeafReader[] parallelReaders, storedFieldsReaders;
  private final Set<LeafReader> completeReaderSet =
    Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
  private final boolean closeSubReaders;
  private final int maxDoc, numDocs;
  private final boolean hasDeletions;
  private final LeafMetaData metaData;
  private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
  private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();//TODO needn't sort?
  private final Map<String,LeafReader> termsFieldToReader = new HashMap<>();

  Create a ParallelLeafReader based on the provided readers; auto-closes the given readers on IndexReader.close(). /** Create a ParallelLeafReader based on the provided
   *  readers; auto-closes the given readers on {@link #close()}. */
  public ParallelLeafReader(LeafReader... readers) throws IOException {
    this(true, readers);
  }

  Create a ParallelLeafReader based on the provided
 readers. /** Create a ParallelLeafReader based on the provided
   *  readers. */
  public ParallelLeafReader(boolean closeSubReaders, LeafReader... readers) throws IOException {
    this(closeSubReaders, readers, readers);
  }

  Expert: create a ParallelLeafReader based on the provided
 readers and storedFieldReaders; when a document is
 loaded, only storedFieldsReaders will be used. /** Expert: create a ParallelLeafReader based on the provided
   *  readers and storedFieldReaders; when a document is
   *  loaded, only storedFieldsReaders will be used. */
  public ParallelLeafReader(boolean closeSubReaders, LeafReader[] readers, LeafReader[] storedFieldsReaders) throws IOException {
    this.closeSubReaders = closeSubReaders;
    if (readers.length == 0 && storedFieldsReaders.length > 0)
      throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
    this.parallelReaders = readers.clone();
    this.storedFieldsReaders = storedFieldsReaders.clone();
    if (parallelReaders.length > 0) {
      final LeafReader first = parallelReaders[0];
      this.maxDoc = first.maxDoc();
      this.numDocs = first.numDocs();
      this.hasDeletions = first.hasDeletions();
    } else {
      this.maxDoc = this.numDocs = 0;
      this.hasDeletions = false;
    }
    Collections.addAll(completeReaderSet, this.parallelReaders);
    Collections.addAll(completeReaderSet, this.storedFieldsReaders);
    
    // check compatibility:
    for(LeafReader reader : completeReaderSet) {
      if (reader.maxDoc() != maxDoc) {
        throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
      }
    }
    final String softDeletesField = completeReaderSet.stream()
        .map(r -> r.getFieldInfos().getSoftDeletesField())
        .filter(Objects::nonNull).findAny().orElse(null);
    // TODO: make this read-only in a cleaner way?
    FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField));

    Sort indexSort = null;
    int createdVersionMajor = -1;

    // build FieldInfos and fieldToReader map:
    for (final LeafReader reader : this.parallelReaders) {
      LeafMetaData leafMetaData = reader.getMetaData();
      
      Sort leafIndexSort = leafMetaData.getSort();
      if (indexSort == null) {
        indexSort = leafIndexSort;
      } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) {
        throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort);
      }

      if (createdVersionMajor == -1) {
        createdVersionMajor = leafMetaData.getCreatedVersionMajor();
      } else if (createdVersionMajor != leafMetaData.getCreatedVersionMajor()) {
        throw new IllegalArgumentException("cannot combine LeafReaders that have different creation versions: saw both version=" + createdVersionMajor + " and " + leafMetaData.getCreatedVersionMajor());
      }

      final FieldInfos readerFieldInfos = reader.getFieldInfos();
      for (FieldInfo fieldInfo : readerFieldInfos) {
        // NOTE: first reader having a given field "wins":
        if (!fieldToReader.containsKey(fieldInfo.name)) {
          builder.add(fieldInfo, fieldInfo.getDocValuesGen());
          fieldToReader.put(fieldInfo.name, reader);
          // only add these if the reader responsible for that field name is the current:
          // TODO consider populating 1st leaf with vectors even if the field name has been seen on a previous leaf
          if (fieldInfo.hasVectors()) {
            tvFieldToReader.put(fieldInfo.name, reader);
          }
          // TODO consider populating 1st leaf with terms even if the field name has been seen on a previous leaf
          if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
            termsFieldToReader.put(fieldInfo.name, reader);
          }
        }
      }
    }
    if (createdVersionMajor == -1) {
      // empty reader
      createdVersionMajor = Version.LATEST.major;
    }

    Version minVersion = Version.LATEST;
    for (final LeafReader reader : this.parallelReaders) {
      Version leafVersion = reader.getMetaData().getMinVersion();
      if (leafVersion == null) {
        minVersion = null;
        break;
      } else if (minVersion.onOrAfter(leafVersion)) {
        minVersion = leafVersion;
      }
    }

    fieldInfos = builder.finish();
    this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort);

    // do this finally so any Exceptions occurred before don't affect refcounts:
    for (LeafReader reader : completeReaderSet) {
      if (!closeSubReaders) {
        reader.incRef();
      }
      reader.registerParentReader(this);
    }
  }

  @Override
  public String toString() {
    final StringBuilder buffer = new StringBuilder("ParallelLeafReader(");
    for (final Iterator<LeafReader> iter = completeReaderSet.iterator(); iter.hasNext();) {
      buffer.append(iter.next());
      if (iter.hasNext()) buffer.append(", ");
    }
    return buffer.append(')').toString();
  }

  // Single instance of this, per ParallelReader instance
  private static final class ParallelFields extends Fields {
    final Map<String,Terms> fields = new TreeMap<>();
    
    ParallelFields() {
    }
    
    void addField(String fieldName, Terms terms) {
      fields.put(fieldName, terms);
    }
    
    @Override
    public Iterator<String> iterator() {
      return Collections.unmodifiableSet(fields.keySet()).iterator();
    }
    
    @Override
    public Terms terms(String field) {
      return fields.get(field);
    }
    
    @Override
    public int size() {
      return fields.size();
    }
  }
  
  {@inheritDoc}
 NOTE: the returned field numbers will likely not correspond to the actual field numbers in the underlying readers, and codec metadata (FieldInfo.getAttribute(String) will be unavailable. /**
   * {@inheritDoc}
   * <p>
   * NOTE: the returned field numbers will likely not
   * correspond to the actual field numbers in the underlying
   * readers, and codec metadata ({@link FieldInfo#getAttribute(String)}
   * will be unavailable.
   */
  @Override
  public FieldInfos getFieldInfos() {
    return fieldInfos;
  }
  
  @Override
  public Bits getLiveDocs() {
    ensureOpen();
    return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
  }

  @Override
  public Terms terms(String field) throws IOException {
    ensureOpen();
    LeafReader leafReader = termsFieldToReader.get(field);
    return leafReader == null ? null : leafReader.terms(field);
  }

  @Override
  public int numDocs() {
    // Don't call ensureOpen() here (it could affect performance)
    return numDocs;
  }
  
  @Override
  public int maxDoc() {
    // Don't call ensureOpen() here (it could affect performance)
    return maxDoc;
  }
  
  @Override
  public void document(int docID, StoredFieldVisitor visitor) throws IOException {
    ensureOpen();
    for (final LeafReader reader: storedFieldsReaders) {
      reader.document(docID, visitor);
    }
  }
  
  @Override
  public CacheHelper getCoreCacheHelper() {
    // ParallelReader instances can be short-lived, which would make caching trappy
    // so we do not cache on them, unless they wrap a single reader in which
    // case we delegate
    if (parallelReaders.length == 1
        && storedFieldsReaders.length == 1
        && parallelReaders[0] == storedFieldsReaders[0]) {
      return parallelReaders[0].getCoreCacheHelper();
    }
    return null;
  }

  @Override
  public CacheHelper getReaderCacheHelper() {
    // ParallelReader instances can be short-lived, which would make caching trappy
    // so we do not cache on them, unless they wrap a single reader in which
    // case we delegate
    if (parallelReaders.length == 1
        && storedFieldsReaders.length == 1
        && parallelReaders[0] == storedFieldsReaders[0]) {
      return parallelReaders[0].getReaderCacheHelper();
    }
    return null;
  }

  @Override
  public Fields getTermVectors(int docID) throws IOException {
    ensureOpen();
    ParallelFields fields = null;
    for (Map.Entry<String,LeafReader> ent : tvFieldToReader.entrySet()) {
      String fieldName = ent.getKey();
      Terms vector = ent.getValue().getTermVector(docID, fieldName);
      if (vector != null) {
        if (fields == null) {
          fields = new ParallelFields();
        }
        fields.addField(fieldName, vector);
      }
    }
    
    return fields;
  }
  
  @Override
  protected synchronized void doClose() throws IOException {
    IOException ioe = null;
    for (LeafReader reader : completeReaderSet) {
      try {
        if (closeSubReaders) {
          reader.close();
        } else {
          reader.decRef();
        }
      } catch (IOException e) {
        if (ioe == null) ioe = e;
      }
    }
    // throw the first exception
    if (ioe != null) throw ioe;
  }

  @Override
  public NumericDocValues getNumericDocValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    return reader == null ? null : reader.getNumericDocValues(field);
  }
  
  @Override
  public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    return reader == null ? null : reader.getBinaryDocValues(field);
  }

  @Override
  public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    return reader == null ? null : reader.getSortedDocValues(field);
  }
  
  @Override
  public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    return reader == null ? null : reader.getSortedNumericDocValues(field);
  }

  @Override
  public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    return reader == null ? null : reader.getSortedSetDocValues(field);
  }

  @Override
  public NumericDocValues getNormValues(String field) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(field);
    NumericDocValues values = reader == null ? null : reader.getNormValues(field);
    return values;
  }

  @Override
  public PointValues getPointValues(String fieldName) throws IOException {
    ensureOpen();
    LeafReader reader = fieldToReader.get(fieldName);
    return reader == null ? null : reader.getPointValues(fieldName);
  }

  @Override
  public void checkIntegrity() throws IOException {
    ensureOpen();
    for (LeafReader reader : completeReaderSet) {
      reader.checkIntegrity();
    }
  }

  Returns the LeafReaders that were passed on init. /** Returns the {@link LeafReader}s that were passed on init. */
  public LeafReader[] getParallelReaders() {
    ensureOpen();
    return parallelReaders;
  }

  @Override
  public LeafMetaData getMetaData() {
    return metaData;
  }

}
/

org.apache.lucene/ lucene-core/ 8.2.0/ org/apache/lucene/index/ParallelLeafReader.java