/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.cassandra.utils.btree;

import java.util.*;
import java.util.function.Consumer;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Ordering;

import org.apache.cassandra.utils.ObjectSizes;

import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.filter;
import static com.google.common.collect.Iterables.transform;
import static java.lang.Math.max;
import static java.lang.Math.min;

public class BTree
{
    
Leaf Nodes are a raw array of values: Object[V1, V1, ...,]. Branch Nodes: Object[V1, V2, ..., child[<V1.key], child[<V2.key], ..., child[< Inf], size], where each child is another node, i.e., an Object[]. Thus, the value elements in a branch node are the first half of the array (minus one). In our implementation, each value must include its own key; we access these via Comparator, rather than directly. So we can quickly distinguish between leaves and branches, we require that leaf nodes are always an odd number of elements (padded with a null, if necessary), and branches are always an even number of elements. BTrees are immutable; updating one returns a new tree that reuses unmodified nodes. There are no references back to a parent node from its children. (This would make it impossible to re-use subtrees when modifying the tree, since the modified tree would need new parent references.) Instead, we store these references in a Path as needed when navigating the tree.
/** * Leaf Nodes are a raw array of values: Object[V1, V1, ...,]. * * Branch Nodes: Object[V1, V2, ..., child[&lt;V1.key], child[&lt;V2.key], ..., child[&lt; Inf], size], where * each child is another node, i.e., an Object[]. Thus, the value elements in a branch node are the * first half of the array (minus one). In our implementation, each value must include its own key; * we access these via Comparator, rather than directly. * * So we can quickly distinguish between leaves and branches, we require that leaf nodes are always an odd number * of elements (padded with a null, if necessary), and branches are always an even number of elements. * * BTrees are immutable; updating one returns a new tree that reuses unmodified nodes. * * There are no references back to a parent node from its children. (This would make it impossible to re-use * subtrees when modifying the tree, since the modified tree would need new parent references.) * Instead, we store these references in a Path as needed when navigating the tree. */
// The maximum fan factor used for B-Trees static final int FAN_SHIFT; static { int fanfactor = 32; if (System.getProperty("cassandra.btree.fanfactor") != null) fanfactor = Integer.parseInt(System.getProperty("cassandra.btree.fanfactor")); int shift = 1; while (1 << shift < fanfactor) shift += 1; FAN_SHIFT = shift; } // NB we encode Path indexes as Bytes, so this needs to be less than Byte.MAX_VALUE / 2 static final int FAN_FACTOR = 1 << FAN_SHIFT; static final int MINIMAL_NODE_SIZE = FAN_FACTOR >> 1; // An empty BTree Leaf - which is the same as an empty BTree static final Object[] EMPTY_LEAF = new Object[1]; // An empty BTree branch - used only for internal purposes in Modifier static final Object[] EMPTY_BRANCH = new Object[] { null, new int[0] }; // direction of iteration public static enum Dir { ASC, DESC; public Dir invert() { return this == ASC ? DESC : ASC; } public static Dir asc(boolean asc) { return asc ? ASC : DESC; } public static Dir desc(boolean desc) { return desc ? DESC : ASC; } } public static Object[] empty() { return EMPTY_LEAF; } public static Object[] singleton(Object value) { return new Object[] { value }; } public static <C, K extends C, V extends C> Object[] build(Collection<K> source, UpdateFunction<K, V> updateF) { return buildInternal(source, source.size(), updateF); } public static <C, K extends C, V extends C> Object[] build(Iterable<K> source, UpdateFunction<K, V> updateF) { return buildInternal(source, -1, updateF); }
Creates a BTree containing all of the objects in the provided collection
Params:
  • source – the items to build the tree with. MUST BE IN STRICTLY ASCENDING ORDER.
  • size – the size of the source iterable
Returns: a btree representing the contents of the provided iterable
/** * Creates a BTree containing all of the objects in the provided collection * * @param source the items to build the tree with. MUST BE IN STRICTLY ASCENDING ORDER. * @param size the size of the source iterable * @return a btree representing the contents of the provided iterable */
public static <C, K extends C, V extends C> Object[] build(Iterable<K> source, int size, UpdateFunction<K, V> updateF) { if (size < 0) throw new IllegalArgumentException(Integer.toString(size)); return buildInternal(source, size, updateF); }
As build(), except:
Params:
  • size – < 0 if size is unknown
/** * As build(), except: * @param size < 0 if size is unknown */
private static <C, K extends C, V extends C> Object[] buildInternal(Iterable<K> source, int size, UpdateFunction<K, V> updateF) { if ((size >= 0) & (size < FAN_FACTOR)) { if (size == 0) return EMPTY_LEAF; // pad to odd length to match contract that all leaf nodes are odd V[] values = (V[]) new Object[size | 1]; { int i = 0; for (K k : source) values[i++] = updateF.apply(k); } if (updateF != UpdateFunction.noOp()) updateF.allocated(ObjectSizes.sizeOfArray(values)); return values; } TreeBuilder builder = TreeBuilder.newInstance(); Object[] btree = builder.build(source, updateF, size); return btree; } public static <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Collection<K> updateWith, UpdateFunction<K, V> updateF) { return update(btree, comparator, updateWith, updateWith.size(), updateF); }
Returns a new BTree with the provided collection inserting/replacing as necessary any equal items
Params:
  • btree – the tree to update
  • comparator – the comparator that defines the ordering over the items in the tree
  • updateWith – the items to either insert / update. MUST BE IN STRICTLY ASCENDING ORDER.
  • updateWithLength – then number of elements in updateWith
  • updateF – the update function to apply to any pairs we are swapping, and maybe abort early
Type parameters:
  • <V> –
Returns:
/** * Returns a new BTree with the provided collection inserting/replacing as necessary any equal items * * @param btree the tree to update * @param comparator the comparator that defines the ordering over the items in the tree * @param updateWith the items to either insert / update. MUST BE IN STRICTLY ASCENDING ORDER. * @param updateWithLength then number of elements in updateWith * @param updateF the update function to apply to any pairs we are swapping, and maybe abort early * @param <V> * @return */
public static <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Iterable<K> updateWith, int updateWithLength, UpdateFunction<K, V> updateF) { if (isEmpty(btree)) return build(updateWith, updateWithLength, updateF); TreeBuilder builder = TreeBuilder.newInstance(); btree = builder.update(btree, comparator, updateWith, updateF); return btree; } public static <K> Object[] merge(Object[] tree1, Object[] tree2, Comparator<? super K> comparator, UpdateFunction<K, K> updateF) { if (size(tree1) < size(tree2)) { Object[] tmp = tree1; tree1 = tree2; tree2 = tmp; } return update(tree1, comparator, new BTreeSet<>(tree2, comparator), updateF); } public static <V> Iterator<V> iterator(Object[] btree) { return iterator(btree, Dir.ASC); } public static <V> Iterator<V> iterator(Object[] btree, Dir dir) { return new BTreeSearchIterator<>(btree, null, dir); } public static <V> Iterator<V> iterator(Object[] btree, int lb, int ub, Dir dir) { return new BTreeSearchIterator<>(btree, null, dir, lb, ub); } public static <V> Iterable<V> iterable(Object[] btree) { return iterable(btree, Dir.ASC); } public static <V> Iterable<V> iterable(Object[] btree, Dir dir) { return () -> iterator(btree, dir); } public static <V> Iterable<V> iterable(Object[] btree, int lb, int ub, Dir dir) { return () -> iterator(btree, lb, ub, dir); }
Returns an Iterator over the entire tree
Params:
  • btree – the tree to iterate over
  • dir – direction of iteration
Type parameters:
  • <V> –
Returns:
/** * Returns an Iterator over the entire tree * * @param btree the tree to iterate over * @param dir direction of iteration * @param <V> * @return */
public static <K, V> BTreeSearchIterator<K, V> slice(Object[] btree, Comparator<? super K> comparator, Dir dir) { return new BTreeSearchIterator<>(btree, comparator, dir); }
Params:
  • btree – the tree to iterate over
  • comparator – the comparator that defines the ordering over the items in the tree
  • start – the beginning of the range to return, inclusive (in ascending order)
  • end – the end of the range to return, exclusive (in ascending order)
  • dir – if false, the iterator will start at the last item and move backwards
Returns: an Iterator over the defined sub-range of the tree
/** * @param btree the tree to iterate over * @param comparator the comparator that defines the ordering over the items in the tree * @param start the beginning of the range to return, inclusive (in ascending order) * @param end the end of the range to return, exclusive (in ascending order) * @param dir if false, the iterator will start at the last item and move backwards * @return an Iterator over the defined sub-range of the tree */
public static <K, V extends K> BTreeSearchIterator<K, V> slice(Object[] btree, Comparator<? super K> comparator, K start, K end, Dir dir) { return slice(btree, comparator, start, true, end, false, dir); }
Params:
  • btree – the tree to iterate over
  • comparator – the comparator that defines the ordering over the items in the tree
  • start – low bound of the range
  • startInclusive – inclusivity of lower bound
  • end – high bound of the range
  • endInclusive – inclusivity of higher bound
  • dir – direction of iteration
Returns: an Iterator over the defined sub-range of the tree
/** * @param btree the tree to iterate over * @param comparator the comparator that defines the ordering over the items in the tree * @param start low bound of the range * @param startInclusive inclusivity of lower bound * @param end high bound of the range * @param endInclusive inclusivity of higher bound * @param dir direction of iteration * @return an Iterator over the defined sub-range of the tree */
public static <K, V extends K> BTreeSearchIterator<K, V> slice(Object[] btree, Comparator<? super K> comparator, K start, boolean startInclusive, K end, boolean endInclusive, Dir dir) { int inclusiveLowerBound = max(0, start == null ? Integer.MIN_VALUE : startInclusive ? ceilIndex(btree, comparator, start) : higherIndex(btree, comparator, start)); int inclusiveUpperBound = min(size(btree) - 1, end == null ? Integer.MAX_VALUE : endInclusive ? floorIndex(btree, comparator, end) : lowerIndex(btree, comparator, end)); return new BTreeSearchIterator<>(btree, comparator, dir, inclusiveLowerBound, inclusiveUpperBound); }
Returns:the item in the tree that sorts as equal to the search argument, or null if no such item
/** * @return the item in the tree that sorts as equal to the search argument, or null if no such item */
public static <V> V find(Object[] node, Comparator<? super V> comparator, V find) { while (true) { int keyEnd = getKeyEnd(node); int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator); if (i >= 0) return (V) node[i]; if (isLeaf(node)) return null; i = -1 - i; node = (Object[]) node[keyEnd + i]; } }
Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable. Finds and replaces the item provided by index in the tree.
/** * Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable. * Finds and replaces the item provided by index in the tree. */
public static <V> void replaceInSitu(Object[] tree, int index, V replace) { // WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation if ((index < 0) | (index >= size(tree))) throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")"); while (!isLeaf(tree)) { final int[] sizeMap = getSizeMap(tree); int boundary = Arrays.binarySearch(sizeMap, index); if (boundary >= 0) { // exact match, in this branch node assert boundary < sizeMap.length - 1; tree[boundary] = replace; return; } boundary = -1 -boundary; if (boundary > 0) { assert boundary < sizeMap.length; index -= (1 + sizeMap[boundary - 1]); } tree = (Object[]) tree[getChildStart(tree) + boundary]; } assert index < getLeafKeyEnd(tree); tree[index] = replace; }
Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable. Finds and replaces the provided item in the tree. Both should sort as equal to each other (although this is not enforced)
/** * Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable. * Finds and replaces the provided item in the tree. Both should sort as equal to each other (although this is not enforced) */
public static <V> void replaceInSitu(Object[] node, Comparator<? super V> comparator, V find, V replace) { while (true) { int keyEnd = getKeyEnd(node); int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator); if (i >= 0) { assert find == node[i]; node[i] = replace; return; } if (isLeaf(node)) throw new NoSuchElementException(); i = -1 - i; node = (Object[]) node[keyEnd + i]; } }
Honours result semantics of Arrays.binarySearch, as though it were performed on the tree flattened into an array
Returns:index of item in tree, or (-(insertion point) - 1) if not present
/** * Honours result semantics of {@link Arrays#binarySearch}, as though it were performed on the tree flattened into an array * @return index of item in tree, or <tt>(-(<i>insertion point</i>) - 1)</tt> if not present */
public static <V> int findIndex(Object[] node, Comparator<? super V> comparator, V find) { int lb = 0; while (true) { int keyEnd = getKeyEnd(node); int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator); boolean exact = i >= 0; if (isLeaf(node)) return exact ? lb + i : i - lb; if (!exact) i = -1 - i; int[] sizeMap = getSizeMap(node); if (exact) return lb + sizeMap[i]; else if (i > 0) lb += sizeMap[i - 1] + 1; node = (Object[]) node[keyEnd + i]; } }
Returns:the value at the index'th position in the tree, in tree order
/** * @return the value at the index'th position in the tree, in tree order */
public static <V> V findByIndex(Object[] tree, int index) { // WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation if ((index < 0) | (index >= size(tree))) throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")"); Object[] node = tree; while (true) { if (isLeaf(node)) { int keyEnd = getLeafKeyEnd(node); assert index < keyEnd; return (V) node[index]; } int[] sizeMap = getSizeMap(node); int boundary = Arrays.binarySearch(sizeMap, index); if (boundary >= 0) { // exact match, in this branch node assert boundary < sizeMap.length - 1; return (V) node[boundary]; } boundary = -1 -boundary; if (boundary > 0) { assert boundary < sizeMap.length; index -= (1 + sizeMap[boundary - 1]); } node = (Object[]) node[getChildStart(node) + boundary]; } } /* since we have access to binarySearch semantics within indexOf(), we can use this to implement * lower/upper/floor/higher very trivially * * this implementation is *not* optimal; it requires two logarithmic traversals, although the second is much cheaper * (having less height, and operating over only primitive arrays), and the clarity is compelling */ public static <V> int lowerIndex(Object[] btree, Comparator<? super V> comparator, V find) { int i = findIndex(btree, comparator, find); if (i < 0) i = -1 -i; return i - 1; } public static <V> V lower(Object[] btree, Comparator<? super V> comparator, V find) { int i = lowerIndex(btree, comparator, find); return i >= 0 ? findByIndex(btree, i) : null; } public static <V> int floorIndex(Object[] btree, Comparator<? super V> comparator, V find) { int i = findIndex(btree, comparator, find); if (i < 0) i = -2 -i; return i; } public static <V> V floor(Object[] btree, Comparator<? super V> comparator, V find) { int i = floorIndex(btree, comparator, find); return i >= 0 ? findByIndex(btree, i) : null; } public static <V> int higherIndex(Object[] btree, Comparator<? super V> comparator, V find) { int i = findIndex(btree, comparator, find); if (i < 0) i = -1 -i; else i++; return i; } public static <V> V higher(Object[] btree, Comparator<? super V> comparator, V find) { int i = higherIndex(btree, comparator, find); return i < size(btree) ? findByIndex(btree, i) : null; } public static <V> int ceilIndex(Object[] btree, Comparator<? super V> comparator, V find) { int i = findIndex(btree, comparator, find); if (i < 0) i = -1 -i; return i; } public static <V> V ceil(Object[] btree, Comparator<? super V> comparator, V find) { int i = ceilIndex(btree, comparator, find); return i < size(btree) ? findByIndex(btree, i) : null; } // UTILITY METHODS // get the upper bound we should search in for keys in the node static int getKeyEnd(Object[] node) { if (isLeaf(node)) return getLeafKeyEnd(node); else return getBranchKeyEnd(node); } // get the last index that is non-null in the leaf node static int getLeafKeyEnd(Object[] node) { int len = node.length; return node[len - 1] == null ? len - 1 : len; } // return the boundary position between keys/children for the branch node // == number of keys, as they are indexed from zero static int getBranchKeyEnd(Object[] branchNode) { return (branchNode.length / 2) - 1; }
Returns:first index in a branch node containing child nodes
/** * @return first index in a branch node containing child nodes */
static int getChildStart(Object[] branchNode) { return getBranchKeyEnd(branchNode); }
Returns:last index + 1 in a branch node containing child nodes
/** * @return last index + 1 in a branch node containing child nodes */
static int getChildEnd(Object[] branchNode) { return branchNode.length - 1; }
Returns:number of children in a branch node
/** * @return number of children in a branch node */
static int getChildCount(Object[] branchNode) { return branchNode.length / 2; }
Returns:the size map for the branch node
/** * @return the size map for the branch node */
static int[] getSizeMap(Object[] branchNode) { return (int[]) branchNode[getChildEnd(branchNode)]; }
Returns:the size map for the branch node
/** * @return the size map for the branch node */
static int lookupSizeMap(Object[] branchNode, int index) { return getSizeMap(branchNode)[index]; } // get the size from the btree's index (fails if not present) public static int size(Object[] tree) { if (isLeaf(tree)) return getLeafKeyEnd(tree); int length = tree.length; // length - 1 == getChildEnd == getPositionOfSizeMap // (length / 2) - 1 == getChildCount - 1 == position of full tree size // hard code this, as will be used often; return ((int[]) tree[length - 1])[(length / 2) - 1]; } public static long sizeOfStructureOnHeap(Object[] tree) { long size = ObjectSizes.sizeOfArray(tree); if (isLeaf(tree)) return size; for (int i = getChildStart(tree) ; i < getChildEnd(tree) ; i++) size += sizeOfStructureOnHeap((Object[]) tree[i]); return size; } // returns true if the provided node is a leaf, false if it is a branch static boolean isLeaf(Object[] node) { return (node.length & 1) == 1; } public static boolean isEmpty(Object[] tree) { return tree == EMPTY_LEAF; } public static int depth(Object[] tree) { int depth = 1; while (!isLeaf(tree)) { depth++; tree = (Object[]) tree[getKeyEnd(tree)]; } return depth; }
Fill the target array with the contents of the provided subtree, in ascending order, starting at targetOffset
Params:
  • tree – source
  • target – array
  • targetOffset – offset in target array
Returns:number of items copied (size of tree)
/** * Fill the target array with the contents of the provided subtree, in ascending order, starting at targetOffset * @param tree source * @param target array * @param targetOffset offset in target array * @return number of items copied (size of tree) */
public static int toArray(Object[] tree, Object[] target, int targetOffset) { return toArray(tree, 0, size(tree), target, targetOffset); } public static int toArray(Object[] tree, int treeStart, int treeEnd, Object[] target, int targetOffset) { if (isLeaf(tree)) { int count = treeEnd - treeStart; System.arraycopy(tree, treeStart, target, targetOffset, count); return count; } int newTargetOffset = targetOffset; int childCount = getChildCount(tree); int childOffset = getChildStart(tree); for (int i = 0 ; i < childCount ; i++) { int childStart = treeIndexOffsetOfChild(tree, i); int childEnd = treeIndexOfBranchKey(tree, i); if (childStart <= treeEnd && childEnd >= treeStart) { newTargetOffset += toArray((Object[]) tree[childOffset + i], max(0, treeStart - childStart), min(childEnd, treeEnd) - childStart, target, newTargetOffset); if (treeStart <= childEnd && treeEnd > childEnd) // this check will always fail for the non-existent key target[newTargetOffset++] = tree[i]; } } return newTargetOffset - targetOffset; } // simple class for avoiding duplicate transformation work private static class FiltrationTracker<V> implements Function<V, V> { final Function<? super V, ? extends V> wrapped; int index; boolean failed; private FiltrationTracker(Function<? super V, ? extends V> wrapped) { this.wrapped = wrapped; } public V apply(V i) { V o = wrapped.apply(i); if (o != null) index++; else failed = true; return o; } }
Takes a btree and transforms it using the provided function, filtering out any null results. The result of any transformation must sort identically wrt the other results as their originals
/** * Takes a btree and transforms it using the provided function, filtering out any null results. * The result of any transformation must sort identically wrt the other results as their originals */
public static <V> Object[] transformAndFilter(Object[] btree, Function<? super V, ? extends V> function) { if (isEmpty(btree)) return btree; // TODO: can be made more efficient FiltrationTracker<V> wrapped = new FiltrationTracker<>(function); Object[] result = transformAndFilter(btree, wrapped); if (!wrapped.failed) return result; // take the already transformed bits from the head of the partial result Iterable<V> head = iterable(result, 0, wrapped.index - 1, Dir.ASC); // and concatenate with remainder of original tree, with transformation applied Iterable<V> remainder = iterable(btree, wrapped.index + 1, size(btree) - 1, Dir.ASC); remainder = filter(transform(remainder, function), (x) -> x != null); Iterable<V> build = concat(head, remainder); return buildInternal(build, -1, UpdateFunction.<V>noOp()); } private static <V> Object[] transformAndFilter(Object[] btree, FiltrationTracker<V> function) { Object[] result = btree; boolean isLeaf = isLeaf(btree); int childOffset = isLeaf ? Integer.MAX_VALUE : getChildStart(btree); int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1; for (int i = 0 ; i < limit ; i++) { // we want to visit in iteration order, so we visit our key nodes inbetween our children int idx = isLeaf ? i : (i / 2) + (i % 2 == 0 ? childOffset : 0); Object current = btree[idx]; Object updated = idx < childOffset ? function.apply((V) current) : transformAndFilter((Object[]) current, function); if (updated != current) { if (result == btree) result = btree.clone(); result[idx] = updated; } if (function.failed) return result; } return result; } public static boolean equals(Object[] a, Object[] b) { return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b)); } public static int hashCode(Object[] btree) { // we can't just delegate to Arrays.deepHashCode(), // because two equivalent trees may be represented by differently shaped trees int result = 1; for (Object v : iterable(btree)) result = 31 * result + Objects.hashCode(v); return result; }
tree index => index of key wrt all items in the tree laid out serially This version of the method permits requesting out-of-bounds indexes, -1 and size
Params:
  • root – to calculate tree index within
  • keyIndex – root-local index of key to calculate tree-index
Returns:the number of items preceding the key in the whole tree of root
/** * tree index => index of key wrt all items in the tree laid out serially * * This version of the method permits requesting out-of-bounds indexes, -1 and size * @param root to calculate tree index within * @param keyIndex root-local index of key to calculate tree-index * @return the number of items preceding the key in the whole tree of root */
public static int treeIndexOfKey(Object[] root, int keyIndex) { if (isLeaf(root)) return keyIndex; int[] sizeMap = getSizeMap(root); if ((keyIndex >= 0) & (keyIndex < sizeMap.length)) return sizeMap[keyIndex]; // we support asking for -1 or size, so that we can easily use this for iterator bounds checking if (keyIndex < 0) return -1; return sizeMap[keyIndex - 1] + 1; }
Params:
  • keyIndex – node-local index of the key to calculate index of
Returns:keyIndex; this method is here only for symmetry and clarity
/** * @param keyIndex node-local index of the key to calculate index of * @return keyIndex; this method is here only for symmetry and clarity */
public static int treeIndexOfLeafKey(int keyIndex) { return keyIndex; }
Params:
  • root – to calculate tree-index within
  • keyIndex – root-local index of key to calculate tree-index of
Returns:the number of items preceding the key in the whole tree of root
/** * @param root to calculate tree-index within * @param keyIndex root-local index of key to calculate tree-index of * @return the number of items preceding the key in the whole tree of root */
public static int treeIndexOfBranchKey(Object[] root, int keyIndex) { return lookupSizeMap(root, keyIndex); }
Params:
  • root – to calculate tree-index within
  • childIndex – root-local index of *child* to calculate tree-index of
Returns:the number of items preceding the child in the whole tree of root
/** * @param root to calculate tree-index within * @param childIndex root-local index of *child* to calculate tree-index of * @return the number of items preceding the child in the whole tree of root */
public static int treeIndexOffsetOfChild(Object[] root, int childIndex) { if (childIndex == 0) return 0; return 1 + lookupSizeMap(root, childIndex - 1); } public static <V> Builder<V> builder(Comparator<? super V> comparator) { return new Builder<>(comparator); } public static <V> Builder<V> builder(Comparator<? super V> comparator, int initialCapacity) { return new Builder<>(comparator, initialCapacity); } public static class Builder<V> { // a user-defined bulk resolution, to be applied manually via resolve() public static interface Resolver { // can return a different output type to input, so long as sort order is maintained // if a resolver is present, this method will be called for every sequence of equal inputs // even those with only one item Object resolve(Object[] array, int lb, int ub); } // a user-defined resolver that is applied automatically on encountering two duplicate values public static interface QuickResolver<V> { // can return a different output type to input, so long as sort order is maintained // if a resolver is present, this method will be called for every sequence of equal inputs // even those with only one item V resolve(V a, V b); } Comparator<? super V> comparator; Object[] values; int count; boolean detected = true; // true if we have managed to cheaply ensure sorted (+ filtered, if resolver == null) as we have added boolean auto = true; // false if the user has promised to enforce the sort order and resolve any duplicates QuickResolver<V> quickResolver; protected Builder(Comparator<? super V> comparator) { this(comparator, 16); } protected Builder(Comparator<? super V> comparator, int initialCapacity) { if (initialCapacity == 0) initialCapacity = 16; this.comparator = comparator; this.values = new Object[initialCapacity]; } @VisibleForTesting public Builder() { this.values = new Object[16]; } private Builder(Builder<V> builder) { this.comparator = builder.comparator; this.values = Arrays.copyOf(builder.values, builder.values.length); this.count = builder.count; this.detected = builder.detected; this.auto = builder.auto; this.quickResolver = builder.quickResolver; }
Creates a copy of this Builder.
Returns:a copy of this Builder.
/** * Creates a copy of this {@code Builder}. * @return a copy of this {@code Builder}. */
public Builder<V> copy() { return new Builder<>(this); } public Builder<V> setQuickResolver(QuickResolver<V> quickResolver) { this.quickResolver = quickResolver; return this; } public void reuse() { reuse(comparator); } public void reuse(Comparator<? super V> comparator) { this.comparator = comparator; Arrays.fill(values, null); count = 0; detected = true; } public Builder<V> auto(boolean auto) { this.auto = auto; return this; } public Builder<V> add(V v) { if (count == values.length) values = Arrays.copyOf(values, count * 2); Object[] values = this.values; int prevCount = this.count++; values[prevCount] = v; if (auto && detected && prevCount > 0) { V prev = (V) values[prevCount - 1]; int c = comparator.compare(prev, v); if (c == 0 && auto) { count = prevCount; if (quickResolver != null) values[prevCount - 1] = quickResolver.resolve(prev, v); } else if (c > 0) { detected = false; } } return this; } public Builder<V> addAll(Collection<V> add) { if (auto && add instanceof SortedSet && equalComparators(comparator, ((SortedSet) add).comparator())) { // if we're a SortedSet, permit quick order-preserving addition of items // if we collect all duplicates, don't bother as merge will necessarily be more expensive than sorting at end return mergeAll(add, add.size()); } detected = false; if (values.length < count + add.size()) values = Arrays.copyOf(values, max(count + add.size(), count * 2)); for (V v : add) values[count++] = v; return this; } private static boolean equalComparators(Comparator<?> a, Comparator<?> b) { return a == b || (isNaturalComparator(a) && isNaturalComparator(b)); } private static boolean isNaturalComparator(Comparator<?> a) { return a == null || a == Comparator.naturalOrder() || a == Ordering.natural(); } // iter must be in sorted order! private Builder<V> mergeAll(Iterable<V> add, int addCount) { assert auto; // ensure the existing contents are in order autoEnforce(); int curCount = count; // we make room for curCount * 2 + addCount, so that we can copy the current values to the end // if necessary for continuing the merge, and have the new values directly after the current value range if (values.length < curCount * 2 + addCount) values = Arrays.copyOf(values, max(curCount * 2 + addCount, curCount * 3)); if (add instanceof BTreeSet) { // use btree set's fast toArray method, to append directly ((BTreeSet) add).toArray(values, curCount); } else { // consider calling toArray() and System.arraycopy int i = curCount; for (V v : add) values[i++] = v; } return mergeAll(addCount); } private Builder<V> mergeAll(int addCount) { Object[] a = values; int addOffset = count; int i = 0, j = addOffset; int curEnd = addOffset, addEnd = addOffset + addCount; // save time in cases where we already have a subset, by skipping dir while (i < curEnd && j < addEnd) { V ai = (V) a[i], aj = (V) a[j]; // in some cases, such as Columns, we may have identity supersets, so perform a cheap object-identity check int c = ai == aj ? 0 : comparator.compare(ai, aj); if (c > 0) break; else if (c == 0) { if (quickResolver != null) a[i] = quickResolver.resolve(ai, aj); j++; } i++; } if (j == addEnd) return this; // already a superset of the new values // otherwise, copy the remaining existing values to the very end, freeing up space for merge result int newCount = i; System.arraycopy(a, i, a, addEnd, count - i); curEnd = addEnd + (count - i); i = addEnd; while (i < curEnd && j < addEnd) { V ai = (V) a[i]; V aj = (V) a[j]; // could avoid one comparison if we cared, but would make this ugly int c = comparator.compare(ai, aj); if (c == 0) { Object newValue = quickResolver == null ? ai : quickResolver.resolve(ai, aj); a[newCount++] = newValue; i++; j++; } else { a[newCount++] = c < 0 ? a[i++] : a[j++]; } } // exhausted one of the inputs; fill in remainder of the other if (i < curEnd) { System.arraycopy(a, i, a, newCount, curEnd - i); newCount += curEnd - i; } else if (j < addEnd) { if (j != newCount) System.arraycopy(a, j, a, newCount, addEnd - j); newCount += addEnd - j; } count = newCount; return this; } public boolean isEmpty() { return count == 0; } public Builder<V> reverse() { assert !auto; int mid = count / 2; for (int i = 0 ; i < mid ; i++) { Object t = values[i]; values[i] = values[count - (1 + i)]; values[count - (1 + i)] = t; } return this; } public Builder<V> sort() { Arrays.sort((V[]) values, 0, count, comparator); return this; } // automatically enforce sorted+filtered private void autoEnforce() { if (!detected && count > 1) { sort(); int prevIdx = 0; V prev = (V) values[0]; for (int i = 1 ; i < count ; i++) { V next = (V) values[i]; if (comparator.compare(prev, next) != 0) values[++prevIdx] = prev = next; else if (quickResolver != null) values[prevIdx] = prev = quickResolver.resolve(prev, next); } count = prevIdx + 1; } detected = true; } public Builder<V> resolve(Resolver resolver) { if (count > 0) { int c = 0; int prev = 0; for (int i = 1 ; i < count ; i++) { if (comparator.compare((V) values[i], (V) values[prev]) != 0) { values[c++] = resolver.resolve((V[]) values, prev, i); prev = i; } } values[c++] = resolver.resolve((V[]) values, prev, count); count = c; } return this; } public Object[] build() { if (auto) autoEnforce(); return BTree.build(Arrays.asList(values).subList(0, count), UpdateFunction.noOp()); } }
simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity)
/** simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity) */
static <V> int compare(Comparator<V> cmp, Object a, Object b) { if (a == b) return 0; if (a == NEGATIVE_INFINITY | b == POSITIVE_INFINITY) return -1; if (b == NEGATIVE_INFINITY | a == POSITIVE_INFINITY) return 1; return cmp.compare((V) a, (V) b); } static Object POSITIVE_INFINITY = new Object(); static Object NEGATIVE_INFINITY = new Object(); public static boolean isWellFormed(Object[] btree, Comparator<? extends Object> cmp) { return isWellFormed(cmp, btree, true, NEGATIVE_INFINITY, POSITIVE_INFINITY); } private static boolean isWellFormed(Comparator<?> cmp, Object[] node, boolean isRoot, Object min, Object max) { if (cmp != null && !isNodeWellFormed(cmp, node, min, max)) return false; if (isLeaf(node)) { if (isRoot) return node.length <= FAN_FACTOR + 1; return node.length >= FAN_FACTOR / 2 && node.length <= FAN_FACTOR + 1; } final int keyCount = getBranchKeyEnd(node); if ((!isRoot && keyCount < FAN_FACTOR / 2) || keyCount > FAN_FACTOR + 1) return false; int type = 0; int size = -1; int[] sizeMap = getSizeMap(node); // compare each child node with the branch element at the head of this node it corresponds with for (int i = getChildStart(node); i < getChildEnd(node) ; i++) { Object[] child = (Object[]) node[i]; size += size(child) + 1; if (sizeMap[i - getChildStart(node)] != size) return false; Object localmax = i < node.length - 2 ? node[i - getChildStart(node)] : max; if (!isWellFormed(cmp, child, false, min, localmax)) return false; type |= isLeaf(child) ? 1 : 2; min = localmax; } return type < 3; // either all leaves or all branches but not a mix } private static boolean isNodeWellFormed(Comparator<?> cmp, Object[] node, Object min, Object max) { Object previous = min; int end = getKeyEnd(node); for (int i = 0; i < end; i++) { Object current = node[i]; if (compare(cmp, previous, current) >= 0) return false; previous = current; } return compare(cmp, previous, max) < 0; }
Simple method to walk the btree forwards or reversed and apply a function to each element Public method
/** * Simple method to walk the btree forwards or reversed and apply a function to each element * * Public method * */
public static <V> void apply(Object[] btree, Consumer<V> function, boolean reversed) { if (reversed) applyReverse(btree, function, null); else applyForwards(btree, function, null); }
Simple method to walk the btree forwards or reversed and apply a function till a stop condition is reached Public method
/** * Simple method to walk the btree forwards or reversed and apply a function till a stop condition is reached * * Public method * */
public static <V> void apply(Object[] btree, Consumer<V> function, Predicate<V> stopCondition, boolean reversed) { if (reversed) applyReverse(btree, function, stopCondition); else applyForwards(btree, function, stopCondition); }
Simple method to walk the btree forwards and apply a function till a stop condition is reached Private method
Params:
  • btree –
  • function –
  • stopCondition –
/** * Simple method to walk the btree forwards and apply a function till a stop condition is reached * * Private method * * @param btree * @param function * @param stopCondition */
private static <V> boolean applyForwards(Object[] btree, Consumer<V> function, Predicate<V> stopCondition) { boolean isLeaf = isLeaf(btree); int childOffset = isLeaf ? Integer.MAX_VALUE : getChildStart(btree); int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1; for (int i = 0 ; i < limit ; i++) { // we want to visit in iteration order, so we visit our key nodes inbetween our children int idx = isLeaf ? i : (i / 2) + (i % 2 == 0 ? childOffset : 0); Object current = btree[idx]; if (idx < childOffset) { V castedCurrent = (V) current; if (stopCondition != null && stopCondition.apply(castedCurrent)) return true; function.accept(castedCurrent); } else { if (applyForwards((Object[]) current, function, stopCondition)) return true; } } return false; }
Simple method to walk the btree in reverse and apply a function till a stop condition is reached Private method
Params:
  • btree –
  • function –
  • stopCondition –
/** * Simple method to walk the btree in reverse and apply a function till a stop condition is reached * * Private method * * @param btree * @param function * @param stopCondition */
private static <V> boolean applyReverse(Object[] btree, Consumer<V> function, Predicate<V> stopCondition) { boolean isLeaf = isLeaf(btree); int childOffset = isLeaf ? 0 : getChildStart(btree); int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1; for (int i = limit - 1, visited = 0; i >= 0 ; i--, visited++) { int idx = i; // we want to visit in reverse iteration order, so we visit our children nodes inbetween our keys if (!isLeaf) { int typeOffset = visited / 2; if (i % 2 == 0) { // This is a child branch. Since children are in the second half of the array, we must // adjust for the key's we've visited along the way idx += typeOffset; } else { // This is a key. Since the keys are in the first half of the array and we are iterating // in reverse we subtract the childOffset and adjust for children we've walked so far idx = i - childOffset + typeOffset; } } Object current = btree[idx]; if (isLeaf || idx < childOffset) { V castedCurrent = (V) current; if (stopCondition != null && stopCondition.apply(castedCurrent)) return true; function.accept(castedCurrent); } else { if (applyReverse((Object[]) current, function, stopCondition)) return true; } } return false; } }