/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.blocktree;


import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;

import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

BlockTree statistics for a single field returned by FieldReader.getStats().
@lucene.internal
/** * BlockTree statistics for a single field * returned by {@link FieldReader#getStats()}. * @lucene.internal */
public class Stats {
Byte size of the index.
/** Byte size of the index. */
public long indexNumBytes;
Total number of terms in the field.
/** Total number of terms in the field. */
public long totalTermCount;
Total number of bytes (sum of term lengths) across all terms in the field.
/** Total number of bytes (sum of term lengths) across all terms in the field. */
public long totalTermBytes;
The number of normal (non-floor) blocks in the terms file.
/** The number of normal (non-floor) blocks in the terms file. */
public int nonFloorBlockCount;
The number of floor blocks (meta-blocks larger than the allowed maxItemsPerBlock) in the terms file.
/** The number of floor blocks (meta-blocks larger than the * allowed {@code maxItemsPerBlock}) in the terms file. */
public int floorBlockCount;
The number of sub-blocks within the floor blocks.
/** The number of sub-blocks within the floor blocks. */
public int floorSubBlockCount;
The number of "internal" blocks (that have both terms and sub-blocks).
/** The number of "internal" blocks (that have both * terms and sub-blocks). */
public int mixedBlockCount;
The number of "leaf" blocks (blocks that have only terms).
/** The number of "leaf" blocks (blocks that have only * terms). */
public int termsOnlyBlockCount;
The number of "internal" blocks that do not contain terms (have only sub-blocks).
/** The number of "internal" blocks that do not contain * terms (have only sub-blocks). */
public int subBlocksOnlyBlockCount;
Total number of blocks.
/** Total number of blocks. */
public int totalBlockCount;
Number of blocks at each prefix depth.
/** Number of blocks at each prefix depth. */
public int[] blockCountByPrefixLen = new int[10]; private int startBlockCount; private int endBlockCount;
Total number of bytes used to store term suffixes.
/** Total number of bytes used to store term suffixes. */
public long totalBlockSuffixBytes;
Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 = LZ4
/** * Number of times each compression method has been used. * 0 = uncompressed * 1 = lowercase_ascii * 2 = LZ4 */
public final long[] compressionAlgorithms = new long[3];
Total number of suffix bytes before compression.
/** Total number of suffix bytes before compression. */
public long totalUncompressedBlockSuffixBytes;
Total number of bytes used to store term stats (not including what the PostingsReaderBase stores.
/** Total number of bytes used to store term stats (not * including what the {@link PostingsReaderBase} * stores. */
public long totalBlockStatsBytes;
Total bytes stored by the PostingsReaderBase, plus the other few vInts stored in the frame.
/** Total bytes stored by the {@link PostingsReaderBase}, * plus the other few vInts stored in the frame. */
public long totalBlockOtherBytes;
Segment name.
/** Segment name. */
public final String segment;
Field name.
/** Field name. */
public final String field; Stats(String segment, String field) { this.segment = segment; this.field = field; } void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) { totalBlockCount++; if (isFloor) { if (frame.fp == frame.fpOrig) { floorBlockCount++; } floorSubBlockCount++; } else { nonFloorBlockCount++; } if (blockCountByPrefixLen.length <= frame.prefix) { blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix); } blockCountByPrefixLen[frame.prefix]++; startBlockCount++; totalBlockSuffixBytes += frame.totalSuffixBytes; totalUncompressedBlockSuffixBytes += frame.suffixesReader.length(); if (frame.suffixesReader != frame.suffixLengthsReader) { totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length(); } totalBlockStatsBytes += frame.statsReader.length(); compressionAlgorithms[frame.compressionAlg.code]++; } void endBlock(SegmentTermsEnumFrame frame) { final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd; final int subBlockCount = frame.entCount - termCount; totalTermCount += termCount; if (termCount != 0 && subBlockCount != 0) { mixedBlockCount++; } else if (termCount != 0) { termsOnlyBlockCount++; } else if (subBlockCount != 0) { subBlocksOnlyBlockCount++; } else { throw new IllegalStateException(); } endBlockCount++; final long otherBytes = frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length(); assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd; totalBlockOtherBytes += otherBytes; } void term(BytesRef term) { totalTermBytes += term.length; } void finish() { assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount; assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount; assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount; } @Override public String toString() { final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); PrintStream out; try { out = new PrintStream(bos, false, IOUtils.UTF_8); } catch (UnsupportedEncodingException bogus) { throw new RuntimeException(bogus); } out.println(" index FST:"); out.println(" " + indexNumBytes + " bytes"); out.println(" terms:"); out.println(" " + totalTermCount + " terms"); out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); out.println(" blocks:"); out.println(" " + totalBlockCount + " blocks"); out.println(" " + termsOnlyBlockCount + " terms-only blocks"); out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks"); out.println(" " + mixedBlockCount + " mixed blocks"); out.println(" " + floorBlockCount + " floor blocks"); out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks"); out.println(" " + floorSubBlockCount + " floor sub-blocks"); out.println(" " + totalUncompressedBlockSuffixBytes + " term suffix bytes before compression" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); StringBuilder compressionCounts = new StringBuilder(); for (int code = 0; code < compressionAlgorithms.length; ++code) { if (compressionAlgorithms[code] == 0) { continue; } if (compressionCounts.length() > 0) { compressionCounts.append(", "); } compressionCounts.append(CompressionAlgorithm.byCode(code)); compressionCounts.append(": "); compressionCounts.append(compressionAlgorithms[code]); } out.println(" " + totalBlockSuffixBytes + " compressed term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.2f", ((double) totalBlockSuffixBytes)/totalUncompressedBlockSuffixBytes) + " compression ratio - compression count by algorithm: " + compressionCounts : "") + ")"); out.println(" " + totalBlockStatsBytes + " term stats bytes " + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); if (totalBlockCount != 0) { out.println(" by prefix length:"); int total = 0; for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) { final int blockCount = blockCountByPrefixLen[prefix]; total += blockCount; if (blockCount != 0) { out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount); } } assert totalBlockCount == total; } try { return bos.toString(IOUtils.UTF_8); } catch (UnsupportedEncodingException bogus) { throw new RuntimeException(bogus); } } }