/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.automaton.CompiledAutomaton;

final class FreqProxTermsWriter extends TermsHash {

  FreqProxTermsWriter(final IntBlockPool.Allocator intBlockAllocator, final ByteBlockPool.Allocator byteBlockAllocator, Counter bytesUsed, TermsHash termVectors) {
    super(intBlockAllocator, byteBlockAllocator, bytesUsed, termVectors);
  }

  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
    // Process any pending Term deletes for this newly
    // flushed segment:
    if (state.segUpdates != null && state.segUpdates.deleteTerms.size() > 0) {
      Map<Term,Integer> segDeletes = state.segUpdates.deleteTerms;
      List<Term> deleteTerms = new ArrayList<>(segDeletes.keySet());
      Collections.sort(deleteTerms);
      FrozenBufferedUpdates.TermDocsIterator iterator = new FrozenBufferedUpdates.TermDocsIterator(fields, true);
      for(Term deleteTerm : deleteTerms) {
        DocIdSetIterator postings = iterator.nextTerm(deleteTerm.field(), deleteTerm.bytes());
        if (postings != null ) {
          int delDocLimit = segDeletes.get(deleteTerm);
          assert delDocLimit < PostingsEnum.NO_MORE_DOCS;
          int doc;
          while ((doc = postings.nextDoc()) < delDocLimit) {
            if (state.liveDocs == null) {
              state.liveDocs = new FixedBitSet(state.segmentInfo.maxDoc());
              state.liveDocs.set(0, state.segmentInfo.maxDoc());
            }
            if (state.liveDocs.get(doc)) {
              state.delCountOnFlush++;
              state.liveDocs.clear(doc);
            }
          }
        }
      }
    }
  }

  @Override
  public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state,
      Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
    super.flush(fieldsToFlush, state, sortMap, norms);

    // Gather all fields that saw any postings:
    List<FreqProxTermsWriterPerField> allFields = new ArrayList<>();

    for (TermsHashPerField f : fieldsToFlush.values()) {
      final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
      if (perField.getNumTerms() > 0) {
        perField.sortTerms();
        assert perField.indexOptions != IndexOptions.NONE;
        allFields.add(perField);
      }
    }

    // Sort by field name
    CollectionUtil.introSort(allFields);

    Fields fields = new FreqProxFields(allFields);
    applyDeletes(state, fields);
    if (sortMap != null) {
      final Sorter.DocMap docMap = sortMap;
      final FieldInfos infos = state.fieldInfos;
      fields = new FilterLeafReader.FilterFields(fields) {

        @Override
        public Terms terms(final String field) throws IOException {
          Terms terms = in.terms(field);
          if (terms == null) {
            return null;
          } else {
            return new SortingTerms(terms, infos.fieldInfo(field).getIndexOptions(), docMap);
          }
        }
      };
    }

    FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
    boolean success = false;
    try {
      consumer.write(fields, norms);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(consumer);
      } else {
        IOUtils.closeWhileHandlingException(consumer);
      }
    }

  }

  @Override
  public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
    return new FreqProxTermsWriterPerField(invertState, this, fieldInfo, nextTermsHash.addField(invertState, fieldInfo));
  }

  static class SortingTerms extends FilterLeafReader.FilterTerms {

    private final Sorter.DocMap docMap;
    private final IndexOptions indexOptions;

    SortingTerms(final Terms in, IndexOptions indexOptions, final Sorter.DocMap docMap) {
      super(in);
      this.docMap = docMap;
      this.indexOptions = indexOptions;
    }

    @Override
    public TermsEnum iterator() throws IOException {
      return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions());
    }

    @Override
    public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
        throws IOException {
      return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
    }

  }

  private static class SortingTermsEnum extends FilterLeafReader.FilterTermsEnum {

    final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
    private final IndexOptions indexOptions;
    private final boolean hasPositions;

    SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
      super(in);
      this.docMap = docMap;
      this.indexOptions = indexOptions;
      this.hasPositions = hasPositions;
    }

    @Override
    public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException {

      if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
        final PostingsEnum inReuse;
        final SortingPostingsEnum wrapReuse;
        if (reuse != null && reuse instanceof SortingPostingsEnum) {
          // if we're asked to reuse the given DocsEnum and it is Sorting, return
          // the wrapped one, since some Codecs expect it.
          wrapReuse = (SortingPostingsEnum) reuse;
          inReuse = wrapReuse.getWrapped();
        } else {
          wrapReuse = null;
          inReuse = reuse;
        }

        final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
        // we ignore the fact that offsets may be stored but not asked for,
        // since this code is expected to be used during addIndexes which will
        // ask for everything. if that assumption changes in the future, we can
        // factor in whether 'flags' says offsets are not required.
        final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
        return new SortingPostingsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
      }

      final PostingsEnum inReuse;
      final SortingDocsEnum wrapReuse;
      if (reuse != null && reuse instanceof SortingDocsEnum) {
        // if we're asked to reuse the given DocsEnum and it is Sorting, return
        // the wrapped one, since some Codecs expect it.
        wrapReuse = (SortingDocsEnum) reuse;
        inReuse = wrapReuse.getWrapped();
      } else {
        wrapReuse = null;
        inReuse = reuse;
      }

      final PostingsEnum inDocs = in.postings(inReuse, flags);
      final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
      return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
    }

  }

  static class SortingDocsEnum extends FilterLeafReader.FilterPostingsEnum {

    private static final class DocFreqSorter extends TimSorter {

      private int[] docs;
      private int[] freqs;
      private final int[] tmpDocs;
      private int[] tmpFreqs;

      DocFreqSorter(int maxDoc) {
        super(maxDoc / 64);
        this.tmpDocs = new int[maxDoc / 64];
      }

      public void reset(int[] docs, int[] freqs) {
        this.docs = docs;
        this.freqs = freqs;
        if (freqs != null && tmpFreqs == null) {
          tmpFreqs = new int[tmpDocs.length];
        }
      }

      @Override
      protected int compare(int i, int j) {
        return docs[i] - docs[j];
      }

      @Override
      protected void swap(int i, int j) {
        int tmpDoc = docs[i];
        docs[i] = docs[j];
        docs[j] = tmpDoc;

        if (freqs != null) {
          int tmpFreq = freqs[i];
          freqs[i] = freqs[j];
          freqs[j] = tmpFreq;
        }
      }

      @Override
      protected void copy(int src, int dest) {
        docs[dest] = docs[src];
        if (freqs != null) {
          freqs[dest] = freqs[src];
        }
      }

      @Override
      protected void save(int i, int len) {
        System.arraycopy(docs, i, tmpDocs, 0, len);
        if (freqs != null) {
          System.arraycopy(freqs, i, tmpFreqs, 0, len);
        }
      }

      @Override
      protected void restore(int i, int j) {
        docs[j] = tmpDocs[i];
        if (freqs != null) {
          freqs[j] = tmpFreqs[i];
        }
      }

      @Override
      protected int compareSaved(int i, int j) {
        return tmpDocs[i] - docs[j];
      }
    }

    private final int maxDoc;
    private final DocFreqSorter sorter;
    private int[] docs;
    private int[] freqs;
    private int docIt = -1;
    private final int upto;
    private final boolean withFreqs;

    SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
      super(in);
      this.maxDoc = maxDoc;
      this.withFreqs = withFreqs;
      if (reuse != null) {
        if (reuse.maxDoc == maxDoc) {
          sorter = reuse.sorter;
        } else {
          sorter = new DocFreqSorter(maxDoc);
        }
        docs = reuse.docs;
        freqs = reuse.freqs; // maybe null
      } else {
        docs = new int[64];
        sorter = new DocFreqSorter(maxDoc);
      }
      docIt = -1;
      int i = 0;
      int doc;
      if (withFreqs) {
        if (freqs == null || freqs.length < docs.length) {
          freqs = new int[docs.length];
        }
        while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
          if (i >= docs.length) {
            docs = ArrayUtil.grow(docs, docs.length + 1);
            freqs = ArrayUtil.grow(freqs, freqs.length + 1);
          }
          docs[i] = docMap.oldToNew(doc);
          freqs[i] = in.freq();
          ++i;
        }
      } else {
        freqs = null;
        while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
          if (i >= docs.length) {
            docs = ArrayUtil.grow(docs, docs.length + 1);
          }
          docs[i++] = docMap.oldToNew(doc);
        }
      }
      // TimSort can save much time compared to other sorts in case of
      // reverse sorting, or when sorting a concatenation of sorted readers
      sorter.reset(docs, freqs);
      sorter.sort(0, i);
      upto = i;
    }

    // for testing
    boolean reused(PostingsEnum other) {
      if (other == null || !(other instanceof SortingDocsEnum)) {
        return false;
      }
      return docs == ((SortingDocsEnum) other).docs;
    }

    @Override
    public int advance(final int target) throws IOException {
      // need to support it for checkIndex, but in practice it won't be called, so
      // don't bother to implement efficiently for now.
      return slowAdvance(target);
    }

    @Override
    public int docID() {
      return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
    }

    @Override
    public int freq() throws IOException {
      return withFreqs && docIt < upto ? freqs[docIt] : 1;
    }

    @Override
    public int nextDoc() throws IOException {
      if (++docIt >= upto) return NO_MORE_DOCS;
      return docs[docIt];
    }

    
Returns the wrapped PostingsEnum.
/** Returns the wrapped {@link PostingsEnum}. */
PostingsEnum getWrapped() { return in; } // we buffer up docs/freqs only, don't forward any positions requests to underlying enum @Override public int nextPosition() throws IOException { return -1; } @Override public int startOffset() throws IOException { return -1; } @Override public int endOffset() throws IOException { return -1; } @Override public BytesRef getPayload() throws IOException { return null; } } static class SortingPostingsEnum extends FilterLeafReader.FilterPostingsEnum {
A TimSorter which sorts two parallel arrays of doc IDs and offsets in one go. Everyti me a doc ID is 'swapped', its corresponding offset is swapped too.
/** * A {@link TimSorter} which sorts two parallel arrays of doc IDs and * offsets in one go. Everyti * me a doc ID is 'swapped', its corresponding offset * is swapped too. */
private static final class DocOffsetSorter extends TimSorter { private int[] docs; private long[] offsets; private final int[] tmpDocs; private final long[] tmpOffsets; public DocOffsetSorter(int maxDoc) { super(maxDoc / 64); this.tmpDocs = new int[maxDoc / 64]; this.tmpOffsets = new long[maxDoc / 64]; } public void reset(int[] docs, long[] offsets) { this.docs = docs; this.offsets = offsets; } @Override protected int compare(int i, int j) { return docs[i] - docs[j]; } @Override protected void swap(int i, int j) { int tmpDoc = docs[i]; docs[i] = docs[j]; docs[j] = tmpDoc; long tmpOffset = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tmpOffset; } @Override protected void copy(int src, int dest) { docs[dest] = docs[src]; offsets[dest] = offsets[src]; } @Override protected void save(int i, int len) { System.arraycopy(docs, i, tmpDocs, 0, len); System.arraycopy(offsets, i, tmpOffsets, 0, len); } @Override protected void restore(int i, int j) { docs[j] = tmpDocs[i]; offsets[j] = tmpOffsets[i]; } @Override protected int compareSaved(int i, int j) { return tmpDocs[i] - docs[j]; } } private final int maxDoc; private final DocOffsetSorter sorter; private int[] docs; private long[] offsets; private final int upto; private final ByteBuffersDataInput postingInput; private final boolean storeOffsets; private int docIt = -1; private int pos; private int startOffset = -1; private int endOffset = -1; private final BytesRef payload; private int currFreq; private final ByteBuffersDataOutput buffer; SortingPostingsEnum(int maxDoc, SortingPostingsEnum reuse, final PostingsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { super(in); this.maxDoc = maxDoc; this.storeOffsets = storeOffsets; if (reuse != null) { docs = reuse.docs; offsets = reuse.offsets; payload = reuse.payload; buffer = reuse.buffer; buffer.reset(); if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter; } else { sorter = new DocOffsetSorter(maxDoc); } } else { docs = new int[32]; offsets = new long[32]; payload = new BytesRef(32); buffer = ByteBuffersDataOutput.newResettableInstance(); sorter = new DocOffsetSorter(maxDoc); } int doc; int i = 0; while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i == docs.length) { final int newLength = ArrayUtil.oversize(i + 1, 4); docs = ArrayUtil.growExact(docs, newLength); offsets = ArrayUtil.growExact(offsets, newLength); } docs[i] = docMap.oldToNew(doc); offsets[i] = buffer.size(); addPositions(in, buffer); i++; } upto = i; sorter.reset(docs, offsets); sorter.sort(0, upto); this.postingInput = buffer.toDataInput(); } // for testing boolean reused(PostingsEnum other) { if (other == null || !(other instanceof SortingPostingsEnum)) { return false; } return docs == ((SortingPostingsEnum) other).docs; } private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException { int freq = in.freq(); out.writeVInt(freq); int previousPosition = 0; int previousEndOffset = 0; for (int i = 0; i < freq; i++) { final int pos = in.nextPosition(); final BytesRef payload = in.getPayload(); // The low-order bit of token is set only if there is a payload, the // previous bits are the delta-encoded position. final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); out.writeVInt(token); previousPosition = pos; if (storeOffsets) { // don't encode offsets if they are not stored final int startOffset = in.startOffset(); final int endOffset = in.endOffset(); out.writeVInt(startOffset - previousEndOffset); out.writeVInt(endOffset - startOffset); previousEndOffset = endOffset; } if (payload != null) { out.writeVInt(payload.length); out.writeBytes(payload.bytes, payload.offset, payload.length); } } } @Override public int advance(final int target) throws IOException { // need to support it for checkIndex, but in practice it won't be called, so // don't bother to implement efficiently for now. return slowAdvance(target); } @Override public int docID() { return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; } @Override public int endOffset() throws IOException { return endOffset; } @Override public int freq() throws IOException { return currFreq; } @Override public BytesRef getPayload() throws IOException { return payload.length == 0 ? null : payload; } @Override public int nextDoc() throws IOException { if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS; postingInput.seek(offsets[docIt]); currFreq = postingInput.readVInt(); // reset variables used in nextPosition pos = 0; endOffset = 0; return docs[docIt]; } @Override public int nextPosition() throws IOException { final int token = postingInput.readVInt(); pos += token >>> 1; if (storeOffsets) { startOffset = endOffset + postingInput.readVInt(); endOffset = startOffset + postingInput.readVInt(); } if ((token & 1) != 0) { payload.offset = 0; payload.length = postingInput.readVInt(); if (payload.length > payload.bytes.length) { payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)]; } postingInput.readBytes(payload.bytes, 0, payload.length); } else { payload.length = 0; } return pos; } @Override public int startOffset() throws IOException { return startOffset; }
Returns the wrapped PostingsEnum.
/** Returns the wrapped {@link PostingsEnum}. */
PostingsEnum getWrapped() { return in; } } }