package org.apache.lucene.index;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
final class DefaultIndexingChain extends DocConsumer {
final Counter bytesUsed;
final DocumentsWriterPerThread.DocState docState;
final DocumentsWriterPerThread docWriter;
final FieldInfos.Builder fieldInfos;
final TermsHash termsHash;
final StoredFieldsConsumer storedFieldsConsumer;
private PerField[] fieldHash = new PerField[2];
private int hashMask = 1;
private int totalFieldCount;
private long nextFieldGen;
private PerField[] fields = new PerField[1];
private final Set<String> finishedDocValues = new HashSet<>();
public DefaultIndexingChain(DocumentsWriterPerThread docWriter) throws IOException {
this.docWriter = docWriter;
this.fieldInfos = docWriter.getFieldInfosBuilder();
this.docState = docWriter.docState;
this.bytesUsed = docWriter.bytesUsed;
final TermsHash termVectorsWriter;
if (docWriter.getSegmentInfo().getIndexSort() == null) {
storedFieldsConsumer = new StoredFieldsConsumer(docWriter);
termVectorsWriter = new TermVectorsConsumer(docWriter);
} else {
storedFieldsConsumer = new SortingStoredFieldsConsumer(docWriter);
termVectorsWriter = new SortingTermVectorsConsumer(docWriter);
}
termsHash = new FreqProxTermsWriter(docWriter, termVectorsWriter);
}
private Sorter.DocMap maybeSortSegment(SegmentWriteState state) throws IOException {
Sort indexSort = state.segmentInfo.getIndexSort();
if (indexSort == null) {
return null;
}
List<Sorter.DocComparator> comparators = new ArrayList<>();
for (int i = 0; i < indexSort.getSort().length; i++) {
SortField sortField = indexSort.getSort()[i];
PerField perField = getPerField(sortField.getField());
if (perField != null && perField.docValuesWriter != null &&
finishedDocValues.contains(perField.fieldInfo.name) == false) {
perField.docValuesWriter.finish(state.segmentInfo.maxDoc());
Sorter.DocComparator cmp = perField.docValuesWriter.getDocComparator(state.segmentInfo.maxDoc(), sortField);
comparators.add(cmp);
finishedDocValues.add(perField.fieldInfo.name);
} else {
}
}
Sorter sorter = new Sorter(indexSort);
return sorter.sort(state.segmentInfo.maxDoc(), comparators.toArray(new Sorter.DocComparator[comparators.size()]));
}
@Override
public Sorter.DocMap flush(SegmentWriteState state) throws IOException {
Sorter.DocMap sortMap = maybeSortSegment(state);
int maxDoc = state.segmentInfo.maxDoc();
long t0 = System.nanoTime();
writeNorms(state, sortMap);
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write norms");
}
SegmentReadState readState = new SegmentReadState(state.directory, state.segmentInfo, state.fieldInfos, true, IOContext.READ, state.segmentSuffix, Collections.emptyMap());
t0 = System.nanoTime();
writeDocValues(state, sortMap);
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write docValues");
}
t0 = System.nanoTime();
writePoints(state, sortMap);
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write points");
}
t0 = System.nanoTime();
storedFieldsConsumer.finish(maxDoc);
storedFieldsConsumer.flush(state, sortMap);
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to finish stored fields");
}
t0 = System.nanoTime();
Map<String,TermsHashPerField> fieldsToFlush = new HashMap<>();
for (int i=0;i<fieldHash.length;i++) {
PerField perField = fieldHash[i];
while (perField != null) {
if (perField.invertState != null) {
fieldsToFlush.put(perField.fieldInfo.name, perField.termsHashPerField);
}
perField = perField.next;
}
}
try (NormsProducer norms = readState.fieldInfos.hasNorms()
? state.segmentInfo.getCodec().normsFormat().normsProducer(readState)
: null) {
NormsProducer normsMergeInstance = null;
if (norms != null) {
normsMergeInstance = norms.getMergeInstance();
}
termsHash.flush(fieldsToFlush, state, sortMap, normsMergeInstance);
}
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write postings and finish vectors");
}
t0 = System.nanoTime();
docWriter.codec.fieldInfosFormat().write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write fieldInfos");
}
return sortMap;
}
private void writePoints(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
PointsWriter pointsWriter = null;
boolean success = false;
try {
for (int i=0;i<fieldHash.length;i++) {
PerField perField = fieldHash[i];
while (perField != null) {
if (perField.pointValuesWriter != null) {
if (perField.fieldInfo.getPointDataDimensionCount() == 0) {
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has no points but wrote them");
}
if (pointsWriter == null) {
PointsFormat fmt = state.segmentInfo.getCodec().pointsFormat();
if (fmt == null) {
throw new IllegalStateException("field=\"" + perField.fieldInfo.name + "\" was indexed as points but codec does not support points");
}
pointsWriter = fmt.fieldsWriter(state);
}
perField.pointValuesWriter.flush(state, sortMap, pointsWriter);
perField.pointValuesWriter = null;
} else if (perField.fieldInfo.getPointDataDimensionCount() != 0) {
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has points but did not write them");
}
perField = perField.next;
}
}
if (pointsWriter != null) {
pointsWriter.finish();
}
success = true;
} finally {
if (success) {
IOUtils.close(pointsWriter);
} else {
IOUtils.closeWhileHandlingException(pointsWriter);
}
}
}
private void writeDocValues(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
int maxDoc = state.segmentInfo.maxDoc();
DocValuesConsumer dvConsumer = null;
boolean success = false;
try {
for (int i=0;i<fieldHash.length;i++) {
PerField perField = fieldHash[i];
while (perField != null) {
if (perField.docValuesWriter != null) {
if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has no docValues but wrote them");
}
if (dvConsumer == null) {
DocValuesFormat fmt = state.segmentInfo.getCodec().docValuesFormat();
dvConsumer = fmt.fieldsConsumer(state);
}
if (finishedDocValues.contains(perField.fieldInfo.name) == false) {
perField.docValuesWriter.finish(maxDoc);
}
perField.docValuesWriter.flush(state, sortMap, dvConsumer);
perField.docValuesWriter = null;
} else if (perField.fieldInfo.getDocValuesType() != DocValuesType.NONE) {
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has docValues but did not write them");
}
perField = perField.next;
}
}
success = true;
} finally {
if (success) {
IOUtils.close(dvConsumer);
} else {
IOUtils.closeWhileHandlingException(dvConsumer);
}
}
if (state.fieldInfos.hasDocValues() == false) {
if (dvConsumer != null) {
throw new AssertionError("segment=" + state.segmentInfo + ": fieldInfos has no docValues but wrote them");
}
} else if (dvConsumer == null) {
throw new AssertionError("segment=" + state.segmentInfo + ": fieldInfos has docValues but did not wrote them");
}
}
private void writeNorms(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
boolean success = false;
NormsConsumer normsConsumer = null;
try {
if (state.fieldInfos.hasNorms()) {
NormsFormat normsFormat = state.segmentInfo.getCodec().normsFormat();
assert normsFormat != null;
normsConsumer = normsFormat.normsConsumer(state);
for (FieldInfo fi : state.fieldInfos) {
PerField perField = getPerField(fi.name);
assert perField != null;
if (fi.omitsNorms() == false && fi.getIndexOptions() != IndexOptions.NONE) {
assert perField.norms != null: "field=" + fi.name;
perField.norms.finish(state.segmentInfo.maxDoc());
perField.norms.flush(state, sortMap, normsConsumer);
}
}
}
success = true;
} finally {
if (success) {
IOUtils.close(normsConsumer);
} else {
IOUtils.closeWhileHandlingException(normsConsumer);
}
}
}
@Override
@SuppressWarnings("try")
public void abort() throws IOException{
try (Closeable finalizer = termsHash::abort){
storedFieldsConsumer.abort();
} finally {
Arrays.fill(fieldHash, null);
}
}
private void rehash() {
int newHashSize = (fieldHash.length*2);
assert newHashSize > fieldHash.length;
PerField newHashArray[] = new PerField[newHashSize];
int newHashMask = newHashSize-1;
for(int j=0;j<fieldHash.length;j++) {
PerField fp0 = fieldHash[j];
while(fp0 != null) {
final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
PerField nextFP0 = fp0.next;
fp0.next = newHashArray[hashPos2];
newHashArray[hashPos2] = fp0;
fp0 = nextFP0;
}
}
fieldHash = newHashArray;
hashMask = newHashMask;
}
private void startStoredFields(int docID) throws IOException {
try {
storedFieldsConsumer.startDocument(docID);
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
private void finishStoredFields() throws IOException {
try {
storedFieldsConsumer.finishDocument();
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
@Override
public void processDocument() throws IOException {
int fieldCount = 0;
long fieldGen = nextFieldGen++;
termsHash.startDocument();
startStoredFields(docState.docID);
try {
for (IndexableField field : docState.doc) {
fieldCount = processField(field, fieldGen, fieldCount);
}
} finally {
if (docWriter.hasHitAbortingException() == false) {
for (int i=0;i<fieldCount;i++) {
fields[i].finish();
}
finishStoredFields();
}
}
try {
termsHash.finishDocument();
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
private int processField(IndexableField field, long fieldGen, int fieldCount) throws IOException {
String fieldName = field.name();
IndexableFieldType fieldType = field.fieldType();
PerField fp = null;
if (fieldType.indexOptions() == null) {
throw new NullPointerException("IndexOptions must not be null (field: \"" + field.name() + "\")");
}
if (fieldType.indexOptions() != IndexOptions.NONE) {
fp = getOrAddField(fieldName, fieldType, true);
boolean first = fp.fieldGen != fieldGen;
fp.invert(field, first);
if (first) {
fields[fieldCount++] = fp;
fp.fieldGen = fieldGen;
}
} else {
verifyUnIndexedFieldType(fieldName, fieldType);
}
if (fieldType.stored()) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
if (fieldType.stored()) {
String value = field.stringValue();
if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
}
try {
storedFieldsConsumer.writeField(fp.fieldInfo, field);
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
}
DocValuesType dvType = fieldType.docValuesType();
if (dvType == null) {
throw new NullPointerException("docValuesType must not be null (field: \"" + fieldName + "\")");
}
if (dvType != DocValuesType.NONE) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
indexDocValue(fp, dvType, field);
}
if (fieldType.pointDataDimensionCount() != 0) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
indexPoint(fp, field);
}
return fieldCount;
}
private static void verifyUnIndexedFieldType(String name, IndexableFieldType ft) {
if (ft.storeTermVectors()) {
throw new IllegalArgumentException("cannot store term vectors "
+ "for a field that is not indexed (field=\"" + name + "\")");
}
if (ft.storeTermVectorPositions()) {
throw new IllegalArgumentException("cannot store term vector positions "
+ "for a field that is not indexed (field=\"" + name + "\")");
}
if (ft.storeTermVectorOffsets()) {
throw new IllegalArgumentException("cannot store term vector offsets "
+ "for a field that is not indexed (field=\"" + name + "\")");
}
if (ft.storeTermVectorPayloads()) {
throw new IllegalArgumentException("cannot store term vector payloads "
+ "for a field that is not indexed (field=\"" + name + "\")");
}
}
private void indexPoint(PerField fp, IndexableField field) throws IOException {
int pointDataDimensionCount = field.fieldType().pointDataDimensionCount();
int pointIndexDimensionCount = field.fieldType().pointIndexDimensionCount();
int dimensionNumBytes = field.fieldType().pointNumBytes();
if (fp.fieldInfo.getPointDataDimensionCount() == 0) {
fieldInfos.globalFieldNumbers.setDimensions(fp.fieldInfo.number, fp.fieldInfo.name, pointDataDimensionCount, pointIndexDimensionCount, dimensionNumBytes);
}
fp.fieldInfo.setPointDimensions(pointDataDimensionCount, pointIndexDimensionCount, dimensionNumBytes);
if (fp.pointValuesWriter == null) {
fp.pointValuesWriter = new PointValuesWriter(docWriter, fp.fieldInfo);
}
fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
}
private void validateIndexSortDVType(Sort indexSort, String fieldName, DocValuesType dvType) {
for (SortField sortField : indexSort.getSort()) {
if (sortField.getField().equals(fieldName)) {
switch (dvType) {
case NUMERIC:
if (sortField.getType().equals(SortField.Type.INT) == false &&
sortField.getType().equals(SortField.Type.LONG) == false &&
sortField.getType().equals(SortField.Type.FLOAT) == false &&
sortField.getType().equals(SortField.Type.DOUBLE) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case BINARY:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
case SORTED:
if (sortField.getType().equals(SortField.Type.STRING) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_NUMERIC:
if (sortField instanceof SortedNumericSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_SET:
if (sortField instanceof SortedSetSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
default:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
}
}
}
private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException {
if (fp.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
if (docWriter.getSegmentInfo().getIndexSort() != null) {
final Sort indexSort = docWriter.getSegmentInfo().getIndexSort();
validateIndexSortDVType(indexSort, fp.fieldInfo.name, dvType);
}
fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType);
}
fp.fieldInfo.setDocValuesType(dvType);
int docID = docState.docID;
switch(dvType) {
case NUMERIC:
if (fp.docValuesWriter == null) {
fp.docValuesWriter = new NumericDocValuesWriter(fp.fieldInfo, bytesUsed);
}
if (field.numericValue() == null) {
throw new IllegalArgumentException("field=\"" + fp.fieldInfo.name + "\": null value not allowed");
}
((NumericDocValuesWriter) fp.docValuesWriter).addValue(docID, field.numericValue().longValue());
break;
case BINARY:
if (fp.docValuesWriter == null) {
fp.docValuesWriter = new BinaryDocValuesWriter(fp.fieldInfo, bytesUsed);
}
((BinaryDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
break;
case SORTED:
if (fp.docValuesWriter == null) {
fp.docValuesWriter = new SortedDocValuesWriter(fp.fieldInfo, bytesUsed);
}
((SortedDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
break;
case SORTED_NUMERIC:
if (fp.docValuesWriter == null) {
fp.docValuesWriter = new SortedNumericDocValuesWriter(fp.fieldInfo, bytesUsed);
}
((SortedNumericDocValuesWriter) fp.docValuesWriter).addValue(docID, field.numericValue().longValue());
break;
case SORTED_SET:
if (fp.docValuesWriter == null) {
fp.docValuesWriter = new SortedSetDocValuesWriter(fp.fieldInfo, bytesUsed);
}
((SortedSetDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
break;
default:
throw new AssertionError("unrecognized DocValues.Type: " + dvType);
}
}
private PerField getPerField(String name) {
final int hashPos = name.hashCode() & hashMask;
PerField fp = fieldHash[hashPos];
while (fp != null && !fp.fieldInfo.name.equals(name)) {
fp = fp.next;
}
return fp;
}
private PerField getOrAddField(String name, IndexableFieldType fieldType, boolean invert) {
final int hashPos = name.hashCode() & hashMask;
PerField fp = fieldHash[hashPos];
while (fp != null && !fp.fieldInfo.name.equals(name)) {
fp = fp.next;
}
if (fp == null) {
FieldInfo fi = fieldInfos.getOrAdd(name);
initIndexOptions(fi, fieldType.indexOptions());
Map<String, String> attributes = fieldType.getAttributes();
if (attributes != null) {
attributes.forEach((k, v) -> fi.putAttribute(k, v));
}
fp = new PerField(docWriter.getIndexCreatedVersionMajor(), fi, invert);
fp.next = fieldHash[hashPos];
fieldHash[hashPos] = fp;
totalFieldCount++;
if (totalFieldCount >= fieldHash.length/2) {
rehash();
}
if (totalFieldCount > fields.length) {
PerField[] newFields = new PerField[ArrayUtil.oversize(totalFieldCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(fields, 0, newFields, 0, fields.length);
fields = newFields;
}
} else if (invert && fp.invertState == null) {
initIndexOptions(fp.fieldInfo, fieldType.indexOptions());
fp.setInvertState();
}
return fp;
}
private void initIndexOptions(FieldInfo info, IndexOptions indexOptions) {
assert info.getIndexOptions() == IndexOptions.NONE;
fieldInfos.globalFieldNumbers.setIndexOptions(info.number, info.name, indexOptions);
info.setIndexOptions(indexOptions);
}
private final class PerField implements Comparable<PerField> {
final int indexCreatedVersionMajor;
final FieldInfo fieldInfo;
final Similarity similarity;
FieldInvertState invertState;
TermsHashPerField termsHashPerField;
DocValuesWriter docValuesWriter;
PointValuesWriter pointValuesWriter;
long fieldGen = -1;
PerField next;
NormValuesWriter norms;
TokenStream tokenStream;
public PerField(int indexCreatedVersionMajor, FieldInfo fieldInfo, boolean invert) {
this.indexCreatedVersionMajor = indexCreatedVersionMajor;
this.fieldInfo = fieldInfo;
similarity = docState.similarity;
if (invert) {
setInvertState();
}
}
void setInvertState() {
invertState = new FieldInvertState(indexCreatedVersionMajor, fieldInfo.name, fieldInfo.getIndexOptions());
termsHashPerField = termsHash.addField(invertState, fieldInfo);
if (fieldInfo.omitsNorms() == false) {
assert norms == null;
norms = new NormValuesWriter(fieldInfo, docState.docWriter.bytesUsed);
}
}
@Override
public int compareTo(PerField other) {
return this.fieldInfo.name.compareTo(other.fieldInfo.name);
}
public void finish() throws IOException {
if (fieldInfo.omitsNorms() == false) {
long normValue;
if (invertState.length == 0) {
normValue = 0;
} else {
normValue = similarity.computeNorm(invertState);
if (normValue == 0) {
throw new IllegalStateException("Similarity " + similarity + " return 0 for non-empty field");
}
}
norms.addValue(docState.docID, normValue);
}
termsHashPerField.finish();
}
public void invert(IndexableField field, boolean first) throws IOException {
if (first) {
invertState.reset();
}
IndexableFieldType fieldType = field.fieldType();
IndexOptions indexOptions = fieldType.indexOptions();
fieldInfo.setIndexOptions(indexOptions);
if (fieldType.omitNorms()) {
fieldInfo.setOmitsNorms();
}
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
boolean succeededInProcessingField = false;
try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
stream.reset();
invertState.setAttributeSource(stream);
termsHashPerField.start(field, first);
while (stream.incrementToken()) {
int posIncr = invertState.posIncrAttribute.getPositionIncrement();
invertState.position += posIncr;
if (invertState.position < invertState.lastPosition) {
if (posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
} else if (posIncr < 0) {
throw new IllegalArgumentException("position increment must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
} else {
throw new IllegalArgumentException("position overflowed Integer.MAX_VALUE (got posIncr=" + posIncr + " lastPosition=" + invertState.lastPosition + " position=" + invertState.position + ") for field '" + field.name() + "'");
}
} else if (invertState.position > IndexWriter.MAX_POSITION) {
throw new IllegalArgumentException("position " + invertState.position + " is too large for field '" + field.name() + "': max allowed position is " + IndexWriter.MAX_POSITION);
}
invertState.lastPosition = invertState.position;
if (posIncr == 0) {
invertState.numOverlap++;
}
int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + invertState.lastStartOffset + " for field '" + field.name() + "'");
}
invertState.lastStartOffset = startOffset;
try {
invertState.length = Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
} catch (ArithmeticException ae) {
throw new IllegalArgumentException("too many tokens for field \"" + field.name() + "\"");
}
try {
termsHashPerField.add();
} catch (MaxBytesLengthExceededException e) {
byte[] prefix = new byte[30];
BytesRef bigTerm = invertState.termAttribute.getBytesRef();
System.arraycopy(bigTerm.bytes, bigTerm.offset, prefix, 0, 30);
String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + Arrays.toString(prefix) + "...', original message: " + e.getMessage();
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "ERROR: " + msg);
}
throw new IllegalArgumentException(msg, e);
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
stream.end();
invertState.position += invertState.posIncrAttribute.getPositionIncrement();
invertState.offset += invertState.offsetAttribute.endOffset();
succeededInProcessingField = true;
} finally {
if (!succeededInProcessingField && docState.infoStream.isEnabled("DW")) {
docState.infoStream.message("DW", "An exception was thrown while processing field " + fieldInfo.name);
}
}
if (analyzed) {
invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);
}
}
}
@Override
DocIdSetIterator getHasDocValues(String field) {
PerField perField = getPerField(field);
if (perField != null) {
if (perField.docValuesWriter != null) {
if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
return null;
}
return perField.docValuesWriter.getDocIdSet();
}
}
return null;
}
}