/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.BytesRef;

// TODO: break into separate freq and prox writers as
// codecs; make separate container (tii/tis/skip/*) that can
// be configured as any number of files 1..N
final class FreqProxTermsWriterPerField extends TermsHashPerField {

  private FreqProxPostingsArray freqProxPostingsArray;

  final boolean hasFreq;
  final boolean hasProx;
  final boolean hasOffsets;
  PayloadAttribute payloadAttribute;
  OffsetAttribute offsetAttribute;
  long sumTotalTermFreq;
  long sumDocFreq;

  // How many docs have this field:
  int docCount;

  
Set to true if any token had a payload in the current segment.
/** Set to true if any token had a payload in the current * segment. */
boolean sawPayloads; public FreqProxTermsWriterPerField(FieldInvertState invertState, TermsHash termsHash, FieldInfo fieldInfo, TermsHashPerField nextPerField) { super(fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? 2 : 1, invertState, termsHash, nextPerField, fieldInfo); IndexOptions indexOptions = fieldInfo.getIndexOptions(); assert indexOptions != IndexOptions.NONE; hasFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; hasProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; } @Override void finish() throws IOException { super.finish(); sumDocFreq += fieldState.uniqueTermCount; sumTotalTermFreq += fieldState.length; if (fieldState.length > 0) { docCount++; } if (sawPayloads) { fieldInfo.setStorePayloads(); } } @Override boolean start(IndexableField f, boolean first) { super.start(f, first); payloadAttribute = fieldState.payloadAttribute; offsetAttribute = fieldState.offsetAttribute; return true; } void writeProx(int termID, int proxCode) { if (payloadAttribute == null) { writeVInt(1, proxCode<<1); } else { BytesRef payload = payloadAttribute.getPayload(); if (payload != null && payload.length > 0) { writeVInt(1, (proxCode<<1)|1); writeVInt(1, payload.length); writeBytes(1, payload.bytes, payload.offset, payload.length); sawPayloads = true; } else { writeVInt(1, proxCode<<1); } } assert postingsArray == freqProxPostingsArray; freqProxPostingsArray.lastPositions[termID] = fieldState.position; } void writeOffsets(int termID, int offsetAccum) { final int startOffset = offsetAccum + offsetAttribute.startOffset(); final int endOffset = offsetAccum + offsetAttribute.endOffset(); assert startOffset - freqProxPostingsArray.lastOffsets[termID] >= 0; writeVInt(1, startOffset - freqProxPostingsArray.lastOffsets[termID]); writeVInt(1, endOffset - startOffset); freqProxPostingsArray.lastOffsets[termID] = startOffset; } @Override void newTerm(final int termID) { // First time we're seeing this term since the last // flush final FreqProxPostingsArray postings = freqProxPostingsArray; postings.lastDocIDs[termID] = docState.docID; if (!hasFreq) { assert postings.termFreqs == null; postings.lastDocCodes[termID] = docState.docID; fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); } else { postings.lastDocCodes[termID] = docState.docID << 1; postings.termFreqs[termID] = getTermFreq(); if (hasProx) { writeProx(termID, fieldState.position); if (hasOffsets) { writeOffsets(termID, fieldState.offset); } } else { assert !hasOffsets; } fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency); } fieldState.uniqueTermCount++; } @Override void addTerm(final int termID) { final FreqProxPostingsArray postings = freqProxPostingsArray; assert !hasFreq || postings.termFreqs[termID] > 0; if (!hasFreq) { assert postings.termFreqs == null; if (termFreqAtt.getTermFrequency() != 1) { throw new IllegalStateException("field \"" + fieldInfo.name + "\": must index term freq while using custom TermFrequencyAttribute"); } if (docState.docID != postings.lastDocIDs[termID]) { // New document; now encode docCode for previous doc: assert docState.docID > postings.lastDocIDs[termID]; writeVInt(0, postings.lastDocCodes[termID]); postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID]; postings.lastDocIDs[termID] = docState.docID; fieldState.uniqueTermCount++; } } else if (docState.docID != postings.lastDocIDs[termID]) { assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID; // Term not yet seen in the current doc but previously // seen in other doc(s) since the last flush // Now that we know doc freq for previous doc, // write it & lastDocCode if (1 == postings.termFreqs[termID]) { writeVInt(0, postings.lastDocCodes[termID]|1); } else { writeVInt(0, postings.lastDocCodes[termID]); writeVInt(0, postings.termFreqs[termID]); } // Init freq for the current document postings.termFreqs[termID] = getTermFreq(); fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency); postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; postings.lastDocIDs[termID] = docState.docID; if (hasProx) { writeProx(termID, fieldState.position); if (hasOffsets) { postings.lastOffsets[termID] = 0; writeOffsets(termID, fieldState.offset); } } else { assert !hasOffsets; } fieldState.uniqueTermCount++; } else { postings.termFreqs[termID] = Math.addExact(postings.termFreqs[termID], getTermFreq()); fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, postings.termFreqs[termID]); if (hasProx) { writeProx(termID, fieldState.position-postings.lastPositions[termID]); if (hasOffsets) { writeOffsets(termID, fieldState.offset); } } } } private int getTermFreq() { int freq = termFreqAtt.getTermFrequency(); if (freq != 1) { if (hasProx) { throw new IllegalStateException("field \"" + fieldInfo.name + "\": cannot index positions while using custom TermFrequencyAttribute"); } } return freq; } @Override public void newPostingsArray() { freqProxPostingsArray = (FreqProxPostingsArray) postingsArray; } @Override ParallelPostingsArray createPostingsArray(int size) { IndexOptions indexOptions = fieldInfo.getIndexOptions(); assert indexOptions != IndexOptions.NONE; boolean hasFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; boolean hasProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; boolean hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; return new FreqProxPostingsArray(size, hasFreq, hasProx, hasOffsets); } static final class FreqProxPostingsArray extends ParallelPostingsArray { public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) { super(size); if (writeFreqs) { termFreqs = new int[size]; } lastDocIDs = new int[size]; lastDocCodes = new int[size]; if (writeProx) { lastPositions = new int[size]; if (writeOffsets) { lastOffsets = new int[size]; } } else { assert !writeOffsets; } //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets); } int termFreqs[]; // # times this term occurs in the current doc int lastDocIDs[]; // Last docID where this term occurred int lastDocCodes[]; // Code for prior doc int lastPositions[]; // Last position where this term occurred int lastOffsets[]; // Last endOffset where this term occurred @Override ParallelPostingsArray newInstance(int size) { return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null); } @Override void copyTo(ParallelPostingsArray toArray, int numToCopy) { assert toArray instanceof FreqProxPostingsArray; FreqProxPostingsArray to = (FreqProxPostingsArray) toArray; super.copyTo(toArray, numToCopy); System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy); System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy); if (lastPositions != null) { assert to.lastPositions != null; System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy); } if (lastOffsets != null) { assert to.lastOffsets != null; System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy); } if (termFreqs != null) { assert to.termFreqs != null; System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy); } } @Override int bytesPerPosting() { int bytes = ParallelPostingsArray.BYTES_PER_POSTING + 2 * Integer.BYTES; if (lastPositions != null) { bytes += Integer.BYTES; } if (lastOffsets != null) { bytes += Integer.BYTES; } if (termFreqs != null) { bytes += Integer.BYTES; } return bytes; } } }