 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.codecs.compressing.CompressingTermVectorsFormat;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntBlockPool;

final class SortingTermVectorsConsumer extends TermVectorsConsumer {

  private static final TermVectorsFormat TEMP_TERM_VECTORS_FORMAT = new CompressingTermVectorsFormat(
      "TempTermVectors", "", SortingStoredFieldsConsumer.NO_COMPRESSION, 8*1024, 10);
  TrackingTmpOutputDirectoryWrapper tmpDirectory;

  SortingTermVectorsConsumer(final IntBlockPool.Allocator intBlockAllocator, final ByteBlockPool.Allocator byteBlockAllocator, Directory directory, SegmentInfo info, Codec codec) {
    super(intBlockAllocator, byteBlockAllocator, directory, info, codec);

  void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
    super.flush(fieldsToFlush, state, sortMap, norms);
    if (tmpDirectory != null) {
      TermVectorsReader reader = TEMP_TERM_VECTORS_FORMAT
          .vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
      // Don't pull a merge instance, since merge instances optimize for
      // sequential access while term vectors will likely be accessed in random
      // order here.
      TermVectorsWriter writer = codec.termVectorsFormat()
          .vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
      try {
        for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
          Fields vectors = reader.get(sortMap == null ? docID : sortMap.newToOld(docID));
          writeTermVectors(writer, vectors, state.fieldInfos);
        writer.finish(state.fieldInfos, state.segmentInfo.maxDoc());
      } finally {
        IOUtils.close(reader, writer);

  void initTermVectorsWriter() throws IOException {
    if (writer == null) {
      IOContext context = new IOContext(new FlushInfo(lastDocID, bytesUsed.get()));
      tmpDirectory = new TrackingTmpOutputDirectoryWrapper(directory);
      writer = TEMP_TERM_VECTORS_FORMAT.vectorsWriter(tmpDirectory, info, context);
      lastDocID = 0;

  public void abort() {
    try {
    } finally {
      if (tmpDirectory != null) {

Safe (but, slowish) default method to copy every vector field in the provided TermVectorsWriter.
/** Safe (but, slowish) default method to copy every vector field in the provided {@link TermVectorsWriter}. */
private static void writeTermVectors(TermVectorsWriter writer, Fields vectors, FieldInfos fieldInfos) throws IOException { if (vectors == null) { writer.startDocument(0); writer.finishDocument(); return; } int numFields = vectors.size(); if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; for (final Iterator<String> it = vectors.iterator(); it.hasNext(); ) { it.next(); numFields++; } } writer.startDocument(numFields); String lastFieldName = null; TermsEnum termsEnum = null; PostingsEnum docsAndPositionsEnum = null; int fieldCount = 0; for(String fieldName : vectors) { fieldCount++; final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName); assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName; lastFieldName = fieldName; final Terms terms = vectors.terms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } final boolean hasPositions = terms.hasPositions(); final boolean hasOffsets = terms.hasOffsets(); final boolean hasPayloads = terms.hasPayloads(); assert !hasPayloads || hasPositions; int numTerms = (int) terms.size(); if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.iterator(); while(termsEnum.next() != null) { numTerms++; } } writer.startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.iterator(); int termCount = 0; while(termsEnum.next() != null) { termCount++; final int freq = (int) termsEnum.totalTermFreq(); writer.startTerm(termsEnum.term(), freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS); assert docsAndPositionsEnum != null; final int docID = docsAndPositionsEnum.nextDoc(); assert docID != DocIdSetIterator.NO_MORE_DOCS; assert docsAndPositionsEnum.freq() == freq; for(int posUpto=0; posUpto<freq; posUpto++) { final int pos = docsAndPositionsEnum.nextPosition(); final int startOffset = docsAndPositionsEnum.startOffset(); final int endOffset = docsAndPositionsEnum.endOffset(); final BytesRef payload = docsAndPositionsEnum.getPayload(); assert !hasPositions || pos >= 0 ; writer.addPosition(pos, startOffset, endOffset, payload); } } writer.finishTerm(); } assert termCount == numTerms; writer.finishField(); } assert fieldCount == numFields; writer.finishDocument(); } }