/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.codecs.uniformsplit;

import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;

Term of a block line.

Contains the term bytes and the minimal distinguishing prefix (MDP) length of this term.

The MDP is the minimal prefix that distinguishes a term from its immediate previous term (terms are alphabetically sorted).

The incremental encoding suffix is the suffix starting at the last byte of the MDP (inclusive).

Example: For the block

client
color
company
companies
"color" - MDP is "co" - incremental encoding suffix is "olor".
"company" - MDP is "com" - incremental encoding suffix is "mpany".
"companies" - MDP is "compani" - incremental encoding suffix is "ies".
@lucene.experimental
/** * Term of a block line. * <p> * Contains the term bytes and the minimal distinguishing prefix (MDP) length * of this term. * <p> * The MDP is the minimal prefix that distinguishes a term from its immediate * previous term (terms are alphabetically sorted). * <p> * The incremental encoding suffix is the suffix starting at the last byte of * the MDP (inclusive). * <p> * Example: * For the block * <pre> * client * color * company * companies * </pre> * "color" - MDP is "co" - incremental encoding suffix is "olor". * <br> * "company" - MDP is "com" - incremental encoding suffix is "mpany". * <br> * "companies" - MDP is "compani" - incremental encoding suffix is "ies". * * @lucene.experimental */
public class TermBytes implements Accountable { private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(TermBytes.class); protected int mdpLength; protected BytesRef term; public TermBytes(int mdpLength, BytesRef term) { reset(mdpLength, term); } public TermBytes reset(int mdpLength, BytesRef term) { assert term.length > 0 && mdpLength > 0 || term.length == 0 && mdpLength == 0 : "Inconsistent mdpLength=" + mdpLength + ", term.length=" + term.length; assert term.length == 0 || mdpLength <= term.length : "Too large mdpLength=" + mdpLength + ", term.length=" + term.length; assert term.offset == 0; this.mdpLength = mdpLength; this.term = term; return this; }
See Also:
Returns:This term MDP length.
/** * @return This term MDP length. * @see TermBytes */
public int getMdpLength() { return mdpLength; }
Returns:This term bytes.
/** * @return This term bytes. */
public BytesRef getTerm() { return term; }
See Also:
Returns:The offset of this term incremental encoding suffix.
/** * @return The offset of this term incremental encoding suffix. * @see TermBytes */
public int getSuffixOffset() { return Math.max(mdpLength - 1, 0); }
See Also:
Returns:The length of this term incremental encoding suffix.
/** * @return The length of this term incremental encoding suffix. * @see TermBytes */
public int getSuffixLength() { return term.length - getSuffixOffset(); }
Computes the length of the minimal distinguishing prefix (MDP) between a current term and its previous term (terms are alphabetically sorted).

Example: If previous="car" and current="cartridge", then MDP length is 4. It is the length of the minimal prefix distinguishing "cartridge" from "car", that is, the length of "cart".

See Also:
  • TermBytes
/** * Computes the length of the minimal distinguishing prefix (MDP) between * a current term and its previous term (terms are alphabetically sorted). * <p> * Example: If previous="car" and current="cartridge", then MDP length is * 4. It is the length of the minimal prefix distinguishing "cartridge" from * "car", that is, the length of "cart". * * @see TermBytes */
public static int computeMdpLength(BytesRef previousTerm, BytesRef currentTerm) { int mdpLength = previousTerm == null ? 1 : StringHelper.sortKeyLength(previousTerm, currentTerm); return Math.min(mdpLength, currentTerm.length); } @Override public long ramBytesUsed() { return BASE_RAM_USAGE + RamUsageUtil.ramBytesUsed(term); } }