org.apache.lucene/lucene-analyzers-common/8.2.0 : org/apache/lucene/collation/CollationDocValuesField.java

CollationDocValuesField

http://lucene.apache.org/lucene-parent/lucene-analyzers-common: Additional Analyzers (The Apache Software Foundation)

Apache 2

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.collation;


import java.text.Collator;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.util.BytesRef;

Indexes collation keys as a single-valued SortedDocValuesField.  This is more efficient that CollationKeyAnalyzer if the field only has one value: no uninversion is necessary to sort on the field, locale-sensitive range queries can still work via DocValuesRangeQuery, and the underlying data structures built at index-time are likely more efficient and use less memory than FieldCache. /**
 * Indexes collation keys as a single-valued {@link SortedDocValuesField}.
 * <p>
 * This is more efficient that {@link CollationKeyAnalyzer} if the field 
 * only has one value: no uninversion is necessary to sort on the field, 
 * locale-sensitive range queries can still work via {@code DocValuesRangeQuery}, 
 * and the underlying data structures built at index-time are likely more efficient 
 * and use less memory than FieldCache.
 */
public final class CollationDocValuesField extends Field {
  private final String name;
  private final Collator collator;
  private final BytesRef bytes = new BytesRef();
  
  Create a new ICUCollationDocValuesField.
 NOTE: you should not create a new one for each document, instead just make one and reuse it during your indexing process, setting the value via setStringValue(String). 
Params: name – field name
collator – Collator for generating collation keys./**
   * Create a new ICUCollationDocValuesField.
   * <p>
   * NOTE: you should not create a new one for each document, instead
   * just make one and reuse it during your indexing process, setting
   * the value via {@link #setStringValue(String)}.
   * @param name field name
   * @param collator Collator for generating collation keys.
   */
  // TODO: can we make this trap-free? maybe just synchronize on the collator
  // instead? 
  public CollationDocValuesField(String name, Collator collator) {
    super(name, SortedDocValuesField.TYPE);
    this.name = name;
    this.collator = (Collator) collator.clone();
    fieldsData = bytes; // so wrong setters cannot be called
  }

  @Override
  public String name() {
    return name;
  }
  
  @Override
  public void setStringValue(String value) {
    bytes.bytes = collator.getCollationKey(value).toByteArray();
    bytes.offset = 0;
    bytes.length = bytes.bytes.length;
  }
}

/

org.apache.lucene/ lucene-analyzers-common/ 8.2.0/ org/apache/lucene/collation/CollationDocValuesField.java