/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.utils;

import java.util.Arrays;

Simple class for constructing an EsimtatedHistogram from a set of predetermined values
/** * Simple class for constructing an EsimtatedHistogram from a set of predetermined values */
public class HistogramBuilder { public static final long[] EMPTY_LONG_ARRAY = new long[]{}; public static final long[] ZERO = new long[]{ 0 }; public HistogramBuilder() {} public HistogramBuilder(long[] values) { for (long value : values) { add(value); } } private long[] values = new long[10]; int count = 0; public void add(long value) { if (count == values.length) values = Arrays.copyOf(values, values.length << 1); values[count++] = value; }
Returns:buildWithStdevRangesAroundMean(3)
/** * See {@link #buildWithStdevRangesAroundMean(int)} * @return buildWithStdevRangesAroundMean(3) */
public EstimatedHistogram buildWithStdevRangesAroundMean() { return buildWithStdevRangesAroundMean(3); }
Calculate the min, mean, max and standard deviation of the items in the builder, and generate an EstimatedHistogram with upto maxdev stdev size ranges either side of the mean, until min/max are hit; if either min/max are not reached a further range is inserted at the relevant ends. e.g., with a maxdevs of 3, there may be up to 8 ranges (between 9 boundaries, the middle being the mean); the middle 6 will have the same size (stdev) with the outermost two stretching out to min and max.
Params:
  • maxdevs –
Returns:
/** * Calculate the min, mean, max and standard deviation of the items in the builder, and * generate an EstimatedHistogram with upto <code>maxdev</code> stdev size ranges either * side of the mean, until min/max are hit; if either min/max are not reached a further range is * inserted at the relevant ends. e.g., with a <code>maxdevs</code> of 3, there may be <i>up to</i> 8 ranges * (between 9 boundaries, the middle being the mean); the middle 6 will have the same size (stdev) * with the outermost two stretching out to min and max. * * @param maxdevs * @return */
public EstimatedHistogram buildWithStdevRangesAroundMean(int maxdevs) { if (maxdevs < 0) throw new IllegalArgumentException("maxdevs must be greater than or equal to zero"); final int count = this.count; final long[] values = this.values; if (count == 0) return new EstimatedHistogram(EMPTY_LONG_ARRAY, ZERO); long min = Long.MAX_VALUE, max = Long.MIN_VALUE; double sum = 0, sumsq = 0; for (int i = 0 ; i < count ; i++) { final long value = values[i]; sum += value; sumsq += value * value; if (value < min) min = value; if (value > max) max = value; } final long mean = Math.round(sum / count); final double stdev = Math.sqrt((sumsq / count) - (mean * (double) mean)); // build the ranges either side of the mean final long[] lowhalf = buildRange(mean, min, true, stdev, maxdevs); final long[] highhalf = buildRange(mean, max, false, stdev, maxdevs); // combine the ranges final long[] ranges = new long[lowhalf.length + highhalf.length + 1]; System.arraycopy(lowhalf, 0, ranges, 0, lowhalf.length); ranges[lowhalf.length] = mean; System.arraycopy(highhalf, 0, ranges, lowhalf.length + 1, highhalf.length); final EstimatedHistogram hist = new EstimatedHistogram(ranges, new long[ranges.length + 1]); for (int i = 0 ; i < count ; i++) hist.add(values[i]); return hist; } private static long[] buildRange(long mean, long minormax, boolean ismin, double stdev, int maxdevs) { if (minormax == mean) // minormax == mean we have no range to produce, but given the exclusive starts // that begin at zero by default (or -Inf) in EstimatedHistogram we have to generate a min range // to indicate where we start from return ismin ? new long[] { mean - 1 } : EMPTY_LONG_ARRAY; if (stdev < 1) { // deal with stdevs too small to generate sensible ranges return ismin ? new long[] { minormax - 1, mean - 1 } : new long[] { minormax }; } long larger, smaller; if (ismin) { larger = mean; smaller = minormax; } else { larger = minormax; smaller = mean; } double stdevsTo = (larger - smaller) / stdev; if (stdevsTo > 0 && stdevsTo < 1) // always round up if there's just one non-empty range stdevsTo = 1; else // otherwise round to the nearest half stdev, to avoid tiny ranges at the start/end stdevsTo = Math.round(stdevsTo); // limit to 4 stdev ranges - last range will contain everything to boundary final int len = Math.min(maxdevs + 1, (int) stdevsTo); final long[] range = new long[len]; long next = ismin ? minormax - 1 : minormax; for (int i = 0 ; i < range.length ; i++) { long delta = (range.length - (i + 1)) * (long) stdev; if (ismin) { range[i] = next; next = mean - delta; } else { range[len - 1 - i] = next; next = mean + delta; } } return range; } }