org.apache.lucene/lucene-core/8.7.0 : org/apache/lucene/index/Sorter.java

Sorter
https://lucene.apache.org/lucene-parent/lucene-core: Apache Lucene Java Core (The Apache Software Foundation)
Apache License, Version 2.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;

import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

Sorts documents of a given index by returning a permutation on the document
IDs.
@lucene.experimental /**
 * Sorts documents of a given index by returning a permutation on the document
 * IDs.
 * @lucene.experimental
 */
final class Sorter {
  final Sort sort;
  
  Creates a new Sorter to sort the index with sort /** Creates a new Sorter to sort the index with {@code sort} */
  Sorter(Sort sort) {
    if (sort.needsScores()) {
      throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
    }
    this.sort = sort;
  }

  A permutation of doc IDs. For every document ID between 0 and IndexReader.maxDoc(), oldToNew(newToOld(docID)) must
return docID.
/**
   * A permutation of doc IDs. For every document ID between <tt>0</tt> and
   * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
   * return <code>docID</code>.
   */
  static abstract class DocMap {

    Given a doc ID from the original index, return its ordinal in the
 sorted index. /** Given a doc ID from the original index, return its ordinal in the
     *  sorted index. */
    abstract int oldToNew(int docID);

    Given the ordinal of a doc ID, return its doc ID in the original index. /** Given the ordinal of a doc ID, return its doc ID in the original index. */
    abstract int newToOld(int docID);

    Return the number of documents in this map. This must be equal to the number of documents of the LeafReader which is sorted. /** Return the number of documents in this map. This must be equal to the
     *  {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
     *  {@link org.apache.lucene.index.LeafReader} which is sorted. */
    abstract int size();
  }

  Check consistency of a DocMap, useful for assertions. /** Check consistency of a {@link DocMap}, useful for assertions. */
  static boolean isConsistent(DocMap docMap) {
    final int maxDoc = docMap.size();
    for (int i = 0; i < maxDoc; ++i) {
      final int newID = docMap.oldToNew(i);
      final int oldID = docMap.newToOld(newID);
      assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
      assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
      if (i != oldID || newID < 0 || newID >= maxDoc) {
        return false;
      }
    }
    return true;
  }

  private static final class DocValueSorter extends TimSorter {
    
    private final int[] docs;
    private final IndexSorter.DocComparator comparator;
    private final int[] tmp;
    
    DocValueSorter(int[] docs, IndexSorter.DocComparator comparator) {
      super(docs.length / 64);
      this.docs = docs;
      this.comparator = comparator;
      tmp = new int[docs.length / 64];
    }
    
    @Override
    protected int compare(int i, int j) {
      return comparator.compare(docs[i], docs[j]);
    }
    
    @Override
    protected void swap(int i, int j) {
      int tmpDoc = docs[i];
      docs[i] = docs[j];
      docs[j] = tmpDoc;
    }

    @Override
    protected void copy(int src, int dest) {
      docs[dest] = docs[src];
    }

    @Override
    protected void save(int i, int len) {
      System.arraycopy(docs, i, tmp, 0, len);
    }

    @Override
    protected void restore(int i, int j) {
      docs[j] = tmp[i];
    }

    @Override
    protected int compareSaved(int i, int j) {
      return comparator.compare(tmp[i], docs[j]);
    }
  }

  Computes the old-to-new permutation over the given comparator. /** Computes the old-to-new permutation over the given comparator. */
  private static Sorter.DocMap sort(final int maxDoc, IndexSorter.DocComparator comparator) {
    // check if the index is sorted
    boolean sorted = true;
    for (int i = 1; i < maxDoc; ++i) {
      if (comparator.compare(i-1, i) > 0) {
        sorted = false;
        break;
      }
    }
    if (sorted) {
      return null;
    }

    // sort doc IDs
    final int[] docs = new int[maxDoc];
    for (int i = 0; i < maxDoc; i++) {
      docs[i] = i;
    }
    
    DocValueSorter sorter = new DocValueSorter(docs, comparator);
    // It can be common to sort a reader, add docs, sort it again, ... and in
    // that case timSort can save a lot of time
    sorter.sort(0, docs.length); // docs is now the newToOld mapping

    // The reason why we use MonotonicAppendingLongBuffer here is that it
    // wastes very little memory if the index is in random order but can save
    // a lot of memory if the index is already "almost" sorted
    final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    for (int i = 0; i < maxDoc; ++i) {
      newToOldBuilder.add(docs[i]);
    }
    final PackedLongValues newToOld = newToOldBuilder.build();

    // invert the docs mapping:
    for (int i = 0; i < maxDoc; ++i) {
      docs[(int) newToOld.get(i)] = i;
    } // docs is now the oldToNew mapping

    final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    for (int i = 0; i < maxDoc; ++i) {
      oldToNewBuilder.add(docs[i]);
    }
    final PackedLongValues oldToNew = oldToNewBuilder.build();
    
    return new Sorter.DocMap() {

      @Override
      public int oldToNew(int docID) {
        return (int) oldToNew.get(docID);
      }

      @Override
      public int newToOld(int docID) {
        return (int) newToOld.get(docID);
      }

      @Override
      public int size() {
        return maxDoc;
      }
    };
  }

  Returns a mapping from the old document ID to its new location in the sorted index. Implementations can use the auxiliary sort(int, DocComparator) to compute the old-to-new permutation given a list of documents and their corresponding values. 
A return value of null is allowed and means that
reader is already sorted.

NOTE: deleted documents are expected to appear in the mapping as
well, they will however be marked as deleted in the sorted view.
/**
   * Returns a mapping from the old document ID to its new location in the
   * sorted index. Implementations can use the auxiliary
   * {@link #sort(int, IndexSorter.DocComparator)} to compute the old-to-new permutation
   * given a list of documents and their corresponding values.
   * <p>
   * A return value of <tt>null</tt> is allowed and means that
   * <code>reader</code> is already sorted.
   * <p>
   * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
   * well, they will however be marked as deleted in the sorted view.
   */
  DocMap sort(LeafReader reader) throws IOException {
    SortField[] fields = sort.getSort();
    final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length];

    for (int i = 0; i < fields.length; i++) {
      IndexSorter sorter = fields[i].getIndexSorter();
      if (sorter == null) {
        throw new IllegalArgumentException("Cannot use sortfield + "  + fields[i] + " to sort indexes");
      }
      comparators[i] = sorter.getDocComparator(reader, reader.maxDoc());
    }
    return sort(reader.maxDoc(), comparators);
  }


  DocMap sort(int maxDoc, IndexSorter.DocComparator[] comparators) throws IOException {
    final IndexSorter.DocComparator comparator = (docID1, docID2) -> {
      for (int i = 0; i < comparators.length; i++) {
        int comp = comparators[i].compare(docID1, docID2);
        if (comp != 0) {
          return comp;
        }
      }
      return Integer.compare(docID1, docID2); // docid order tiebreak
    };

    return sort(maxDoc, comparator);
  }

  Returns the identifier of this Sorter. This identifier is similar to Object.hashCode() and should be chosen so that two instances of this class that sort documents likewise will have the same identifier. On the contrary, this identifier should be different on different sorts. /**
   * Returns the identifier of this {@link Sorter}.
   * <p>This identifier is similar to {@link Object#hashCode()} and should be
   * chosen so that two instances of this class that sort documents likewise
   * will have the same identifier. On the contrary, this identifier should be
   * different on different {@link Sort sorts}.
   */
  public String getID() {
    return sort.toString();
  }

  @Override
  public String toString() {
    return getID();
  }
  
}
/

org.apache.lucene/ lucene-core/ 8.7.0/ org/apache/lucene/index/Sorter.java