/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.utils;
import java.io.DataInput;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.PeekingIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.db.TypeSizes;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.IPartitionerDependentSerializer;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.io.IVersionedSerializer;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
import org.apache.cassandra.net.MessagingService;
A MerkleTree implemented as a binary tree.
A MerkleTree is a full binary tree that represents a perfect binary tree of
depth 'hashdepth'. In a perfect binary tree, each leaf contains a
sequentially hashed range, and each inner node contains the binary hash of
its two children. In the MerkleTree, many ranges will not be split to the
full depth of the perfect binary tree: the leaves of this tree are Leaf objects,
which contain the computed values of the nodes that would be below them if
the tree were perfect.
The hash values of the inner nodes of the MerkleTree are calculated lazily based
on their children when the hash of a range is requested with hash(range).
Inputs passed to TreeRange.validate should be calculated using a very secure hash,
because all hashing internal to the tree is accomplished using XOR.
If two MerkleTrees have the same hashdepth, they represent a perfect tree
of the same depth, and can always be compared, regardless of size or splits.
/**
* A MerkleTree implemented as a binary tree.
*
* A MerkleTree is a full binary tree that represents a perfect binary tree of
* depth 'hashdepth'. In a perfect binary tree, each leaf contains a
* sequentially hashed range, and each inner node contains the binary hash of
* its two children. In the MerkleTree, many ranges will not be split to the
* full depth of the perfect binary tree: the leaves of this tree are Leaf objects,
* which contain the computed values of the nodes that would be below them if
* the tree were perfect.
*
* The hash values of the inner nodes of the MerkleTree are calculated lazily based
* on their children when the hash of a range is requested with hash(range).
*
* Inputs passed to TreeRange.validate should be calculated using a very secure hash,
* because all hashing internal to the tree is accomplished using XOR.
*
* If two MerkleTrees have the same hashdepth, they represent a perfect tree
* of the same depth, and can always be compared, regardless of size or splits.
*/
public class MerkleTree implements Serializable
{
private static Logger logger = LoggerFactory.getLogger(MerkleTree.class);
public static final MerkleTreeSerializer serializer = new MerkleTreeSerializer();
private static final long serialVersionUID = 2L;
public static final byte RECOMMENDED_DEPTH = Byte.MAX_VALUE - 1;
public static final int CONSISTENT = 0;
public static final int FULLY_INCONSISTENT = 1;
public static final int PARTIALLY_INCONSISTENT = 2;
private static final byte[] EMPTY_HASH = new byte[0];
public final byte hashdepth;
The top level range that this MerkleTree covers. /** The top level range that this MerkleTree covers. */
public final Range<Token> fullRange;
private final IPartitioner partitioner;
private long maxsize;
private long size;
private Hashable root;
public static class MerkleTreeSerializer implements IVersionedSerializer<MerkleTree>
{
public void serialize(MerkleTree mt, DataOutputPlus out, int version) throws IOException
{
out.writeByte(mt.hashdepth);
out.writeLong(mt.maxsize);
out.writeLong(mt.size);
out.writeUTF(mt.partitioner.getClass().getCanonicalName());
// full range
Token.serializer.serialize(mt.fullRange.left, out, version);
Token.serializer.serialize(mt.fullRange.right, out, version);
Hashable.serializer.serialize(mt.root, out, version);
}
public MerkleTree deserialize(DataInputPlus in, int version) throws IOException
{
byte hashdepth = in.readByte();
long maxsize = in.readLong();
long size = in.readLong();
IPartitioner partitioner;
try
{
partitioner = FBUtilities.newPartitioner(in.readUTF());
}
catch (ConfigurationException e)
{
throw new IOException(e);
}
// full range
Token left = Token.serializer.deserialize(in, partitioner, version);
Token right = Token.serializer.deserialize(in, partitioner, version);
Range<Token> fullRange = new Range<>(left, right);
MerkleTree mt = new MerkleTree(partitioner, fullRange, hashdepth, maxsize);
mt.size = size;
mt.root = Hashable.serializer.deserialize(in, partitioner, version);
return mt;
}
public long serializedSize(MerkleTree mt, int version)
{
long size = 1 // mt.hashdepth
+ TypeSizes.sizeof(mt.maxsize)
+ TypeSizes.sizeof(mt.size)
+ TypeSizes.sizeof(mt.partitioner.getClass().getCanonicalName());
// full range
size += Token.serializer.serializedSize(mt.fullRange.left, version);
size += Token.serializer.serializedSize(mt.fullRange.right, version);
size += Hashable.serializer.serializedSize(mt.root, version);
return size;
}
}
Params: - partitioner – The partitioner in use.
- range – the range this tree covers
- hashdepth – The maximum depth of the tree. 100/(2^depth) is the %
of the key space covered by each subrange of a fully populated tree.
- maxsize – The maximum number of subranges in the tree.
/**
* @param partitioner The partitioner in use.
* @param range the range this tree covers
* @param hashdepth The maximum depth of the tree. 100/(2^depth) is the %
* of the key space covered by each subrange of a fully populated tree.
* @param maxsize The maximum number of subranges in the tree.
*/
public MerkleTree(IPartitioner partitioner, Range<Token> range, byte hashdepth, long maxsize)
{
assert hashdepth < Byte.MAX_VALUE;
this.fullRange = Preconditions.checkNotNull(range);
this.partitioner = Preconditions.checkNotNull(partitioner);
this.hashdepth = hashdepth;
this.maxsize = maxsize;
size = 1;
root = new Leaf(null);
}
static byte inc(byte in)
{
assert in < Byte.MAX_VALUE;
return (byte)(in + 1);
}
Initializes this tree by splitting it until hashdepth is reached,
or until an additional level of splits would violate maxsize.
NB: Replaces all nodes in the tree.
/**
* Initializes this tree by splitting it until hashdepth is reached,
* or until an additional level of splits would violate maxsize.
*
* NB: Replaces all nodes in the tree.
*/
public void init()
{
// determine the depth to which we can safely split the tree
byte sizedepth = (byte)(Math.log10(maxsize) / Math.log10(2));
byte depth = (byte)Math.min(sizedepth, hashdepth);
root = initHelper(fullRange.left, fullRange.right, (byte)0, depth);
size = (long)Math.pow(2, depth);
}
private Hashable initHelper(Token left, Token right, byte depth, byte max)
{
if (depth == max)
// we've reached the leaves
return new Leaf();
Token midpoint = partitioner.midpoint(left, right);
if (midpoint.equals(left) || midpoint.equals(right))
return new Leaf();
Hashable lchild = initHelper(left, midpoint, inc(depth), max);
Hashable rchild = initHelper(midpoint, right, inc(depth), max);
return new Inner(midpoint, lchild, rchild);
}
Hashable root()
{
return root;
}
public IPartitioner partitioner()
{
return partitioner;
}
The number of distinct ranges contained in this tree. This is a reasonable
measure of the memory usage of the tree (assuming 'this.order' is significant).
/**
* The number of distinct ranges contained in this tree. This is a reasonable
* measure of the memory usage of the tree (assuming 'this.order' is significant).
*/
public long size()
{
return size;
}
public long maxsize()
{
return maxsize;
}
public void maxsize(long maxsize)
{
this.maxsize = maxsize;
}
Params: - ltree – First tree.
- rtree – Second tree.
Returns: A list of the largest contiguous ranges where the given trees disagree.
/**
* @param ltree First tree.
* @param rtree Second tree.
* @return A list of the largest contiguous ranges where the given trees disagree.
*/
public static List<TreeRange> difference(MerkleTree ltree, MerkleTree rtree)
{
if (!ltree.fullRange.equals(rtree.fullRange))
throw new IllegalArgumentException("Difference only make sense on tree covering the same range (but " + ltree.fullRange + " != " + rtree.fullRange + ")");
List<TreeRange> diff = new ArrayList<>();
TreeDifference active = new TreeDifference(ltree.fullRange.left, ltree.fullRange.right, (byte)0);
Hashable lnode = ltree.find(active);
Hashable rnode = rtree.find(active);
byte[] lhash = lnode.hash();
byte[] rhash = rnode.hash();
active.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());
if (lhash != null && rhash != null && !Arrays.equals(lhash, rhash))
{
if(lnode instanceof Leaf || rnode instanceof Leaf)
{
logger.debug("Digest mismatch detected among leaf nodes {}, {}", lnode, rnode);
diff.add(active);
}
else
{
logger.debug("Digest mismatch detected, traversing trees [{}, {}]", ltree, rtree);
if (FULLY_INCONSISTENT == differenceHelper(ltree, rtree, diff, active))
{
logger.debug("Range {} fully inconsistent", active);
diff.add(active);
}
}
}
else if (lhash == null || rhash == null)
diff.add(active);
return diff;
}
TODO: This function could be optimized into a depth first traversal of
the two trees in parallel.
Takes two trees and a range for which they have hashes, but are inconsistent.
Returns: FULLY_INCONSISTENT if active is inconsistent, PARTIALLY_INCONSISTENT if only a subrange is inconsistent.
/**
* TODO: This function could be optimized into a depth first traversal of
* the two trees in parallel.
*
* Takes two trees and a range for which they have hashes, but are inconsistent.
* @return FULLY_INCONSISTENT if active is inconsistent, PARTIALLY_INCONSISTENT if only a subrange is inconsistent.
*/
@VisibleForTesting
static int differenceHelper(MerkleTree ltree, MerkleTree rtree, List<TreeRange> diff, TreeRange active)
{
if (active.depth == Byte.MAX_VALUE)
return CONSISTENT;
Token midpoint = ltree.partitioner().midpoint(active.left, active.right);
// sanity check for midpoint calculation, see CASSANDRA-13052
if (midpoint.equals(active.left) || midpoint.equals(active.right))
{
// If the midpoint equals either the left or the right, we have a range that's too small to split - we'll simply report the
// whole range as inconsistent
logger.debug("({}) No sane midpoint ({}) for range {} , marking whole range as inconsistent", active.depth, midpoint, active);
return FULLY_INCONSISTENT;
}
TreeDifference left = new TreeDifference(active.left, midpoint, inc(active.depth));
TreeDifference right = new TreeDifference(midpoint, active.right, inc(active.depth));
logger.debug("({}) Hashing sub-ranges [{}, {}] for {} divided by midpoint {}", active.depth, left, right, active, midpoint);
byte[] lhash, rhash;
Hashable lnode, rnode;
// see if we should recurse left
lnode = ltree.find(left);
rnode = rtree.find(left);
lhash = lnode.hash();
rhash = rnode.hash();
left.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());
left.setRows(lnode.rowsInRange(), rnode.rowsInRange());
int ldiff = CONSISTENT;
boolean lreso = lhash != null && rhash != null;
if (lreso && !Arrays.equals(lhash, rhash))
{
logger.debug("({}) Inconsistent digest on left sub-range {}: [{}, {}]", active.depth, left, lnode, rnode);
if (lnode instanceof Leaf) ldiff = FULLY_INCONSISTENT;
else ldiff = differenceHelper(ltree, rtree, diff, left);
}
else if (!lreso)
{
logger.debug("({}) Left sub-range fully inconsistent {}", active.depth, right);
ldiff = FULLY_INCONSISTENT;
}
// see if we should recurse right
lnode = ltree.find(right);
rnode = rtree.find(right);
lhash = lnode.hash();
rhash = rnode.hash();
right.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());
right.setRows(lnode.rowsInRange(), rnode.rowsInRange());
int rdiff = CONSISTENT;
boolean rreso = lhash != null && rhash != null;
if (rreso && !Arrays.equals(lhash, rhash))
{
logger.debug("({}) Inconsistent digest on right sub-range {}: [{}, {}]", active.depth, right, lnode, rnode);
if (rnode instanceof Leaf) rdiff = FULLY_INCONSISTENT;
else rdiff = differenceHelper(ltree, rtree, diff, right);
}
else if (!rreso)
{
logger.debug("({}) Right sub-range fully inconsistent {}", active.depth, right);
rdiff = FULLY_INCONSISTENT;
}
if (ldiff == FULLY_INCONSISTENT && rdiff == FULLY_INCONSISTENT)
{
// both children are fully inconsistent
logger.debug("({}) Fully inconsistent range [{}, {}]", active.depth, left, right);
return FULLY_INCONSISTENT;
}
else if (ldiff == FULLY_INCONSISTENT)
{
logger.debug("({}) Adding left sub-range to diff as fully inconsistent {}", active.depth, left);
diff.add(left);
return PARTIALLY_INCONSISTENT;
}
else if (rdiff == FULLY_INCONSISTENT)
{
logger.debug("({}) Adding right sub-range to diff as fully inconsistent {}", active.depth, right);
diff.add(right);
return PARTIALLY_INCONSISTENT;
}
logger.debug("({}) Range {} partially inconstent", active.depth, active);
return PARTIALLY_INCONSISTENT;
}
For testing purposes.
Gets the smallest range containing the token.
/**
* For testing purposes.
* Gets the smallest range containing the token.
*/
public TreeRange get(Token t)
{
return getHelper(root, fullRange.left, fullRange.right, (byte)0, t);
}
TreeRange getHelper(Hashable hashable, Token pleft, Token pright, byte depth, Token t)
{
while (true)
{
if (hashable instanceof Leaf)
{
// we've reached a hash: wrap it up and deliver it
return new TreeRange(this, pleft, pright, depth, hashable);
}
// else: node.
Inner node = (Inner) hashable;
depth = inc(depth);
if (Range.contains(pleft, node.token, t))
{ // left child contains token
hashable = node.lchild;
pright = node.token;
}
else
{ // else: right child contains token
hashable = node.rchild;
pleft = node.token;
}
}
}
Invalidates the ranges containing the given token.
Useful for testing.
/**
* Invalidates the ranges containing the given token.
* Useful for testing.
*/
public void invalidate(Token t)
{
invalidateHelper(root, fullRange.left, t);
}
private void invalidateHelper(Hashable hashable, Token pleft, Token t)
{
hashable.hash(null);
if (hashable instanceof Leaf)
return;
// else: node.
Inner node = (Inner)hashable;
if (Range.contains(pleft, node.token, t))
// left child contains token
invalidateHelper(node.lchild, pleft, t);
else
// right child contains token
invalidateHelper(node.rchild, node.token, t);
}
Hash the given range in the tree. The range must have been generated
with recursive applications of partitioner.midpoint().
NB: Currently does not support wrapping ranges that do not end with
partitioner.getMinimumToken().
Returns: Null if any subrange of the range is invalid, or if the exact
range cannot be calculated using this tree.
/**
* Hash the given range in the tree. The range must have been generated
* with recursive applications of partitioner.midpoint().
*
* NB: Currently does not support wrapping ranges that do not end with
* partitioner.getMinimumToken().
*
* @return Null if any subrange of the range is invalid, or if the exact
* range cannot be calculated using this tree.
*/
public byte[] hash(Range<Token> range)
{
return find(range).hash();
}
Find the Hashable
node that matches the given range
. Params: - range – Range to find
Returns: Hashable
found. If nothing found, return Leaf
with null hash.
/**
* Find the {@link Hashable} node that matches the given {@code range}.
*
* @param range Range to find
* @return {@link Hashable} found. If nothing found, return {@link Leaf} with null hash.
*/
private Hashable find(Range<Token> range)
{
try
{
return findHelper(root, new Range<Token>(fullRange.left, fullRange.right), range);
}
catch (StopRecursion e)
{
return new Leaf();
}
}
Throws: - StopRecursion – If no match could be found for the range.
/**
* @throws StopRecursion If no match could be found for the range.
*/
private Hashable findHelper(Hashable current, Range<Token> activeRange, Range<Token> find) throws StopRecursion
{
while (true)
{
if (current instanceof Leaf)
{
if (!find.contains(activeRange))
// we are not fully contained in this range!
throw new StopRecursion.BadRange();
return current;
}
// else: node.
Inner node = (Inner) current;
Range<Token> leftRange = new Range<>(activeRange.left, node.token);
Range<Token> rightRange = new Range<>(node.token, activeRange.right);
if (find.contains(activeRange))
// this node is fully contained in the range
return node.calc();
// else: one of our children contains the range
if (leftRange.contains(find))
{ // left child contains/matches the range
current = node.lchild;
activeRange = leftRange;
}
else if (rightRange.contains(find))
{ // right child contains/matches the range
current = node.rchild;
activeRange = rightRange;
}
else
{
throw new StopRecursion.BadRange();
}
}
}
Splits the range containing the given token, if no tree limits would be
violated. If the range would be split to a depth below hashdepth, or if
the tree already contains maxsize subranges, this operation will fail.
Returns: True if the range was successfully split.
/**
* Splits the range containing the given token, if no tree limits would be
* violated. If the range would be split to a depth below hashdepth, or if
* the tree already contains maxsize subranges, this operation will fail.
*
* @return True if the range was successfully split.
*/
public boolean split(Token t)
{
if (!(size < maxsize))
return false;
try
{
root = splitHelper(root, fullRange.left, fullRange.right, (byte)0, t);
}
catch (StopRecursion.TooDeep e)
{
return false;
}
return true;
}
private Hashable splitHelper(Hashable hashable, Token pleft, Token pright, byte depth, Token t) throws StopRecursion.TooDeep
{
if (depth >= hashdepth)
throw new StopRecursion.TooDeep();
if (hashable instanceof Leaf)
{
Token midpoint = partitioner.midpoint(pleft, pright);
// We should not create a non-sensical range where start and end are the same token (this is non-sensical because range are
// start exclusive). Note that we shouldn't hit that unless the full range is very small or we are fairly deep
if (midpoint.equals(pleft) || midpoint.equals(pright))
throw new StopRecursion.TooDeep();
// split
size++;
return new Inner(midpoint, new Leaf(), new Leaf());
}
// else: node.
// recurse on the matching child
Inner node = (Inner)hashable;
if (Range.contains(pleft, node.token, t))
// left child contains token
node.lchild(splitHelper(node.lchild, pleft, node.token, inc(depth), t));
else
// else: right child contains token
node.rchild(splitHelper(node.rchild, node.token, pright, inc(depth), t));
return node;
}
Returns a lazy iterator of invalid TreeRanges that need to be filled
in order to make the given Range valid.
/**
* Returns a lazy iterator of invalid TreeRanges that need to be filled
* in order to make the given Range valid.
*/
public TreeRangeIterator invalids()
{
return new TreeRangeIterator(this);
}
public EstimatedHistogram histogramOfRowSizePerLeaf()
{
HistogramBuilder histbuild = new HistogramBuilder();
for (TreeRange range : new TreeRangeIterator(this))
{
histbuild.add(range.hashable.sizeOfRange);
}
return histbuild.buildWithStdevRangesAroundMean();
}
public EstimatedHistogram histogramOfRowCountPerLeaf()
{
HistogramBuilder histbuild = new HistogramBuilder();
for (TreeRange range : new TreeRangeIterator(this))
{
histbuild.add(range.hashable.rowsInRange);
}
return histbuild.buildWithStdevRangesAroundMean();
}
public long rowCount()
{
long count = 0;
for (TreeRange range : new TreeRangeIterator(this))
{
count += range.hashable.rowsInRange;
}
return count;
}
@Override
public String toString()
{
StringBuilder buff = new StringBuilder();
buff.append("#<MerkleTree root=");
root.toString(buff, 8);
buff.append(">");
return buff.toString();
}
public static class TreeDifference extends TreeRange
{
private static final long serialVersionUID = 6363654174549968183L;
private long sizeOnLeft;
private long sizeOnRight;
private long rowsOnLeft;
private long rowsOnRight;
void setSize(long sizeOnLeft, long sizeOnRight)
{
this.sizeOnLeft = sizeOnLeft;
this.sizeOnRight = sizeOnRight;
}
void setRows(long rowsOnLeft, long rowsOnRight)
{
this.rowsOnLeft = rowsOnLeft;
this.rowsOnRight = rowsOnRight;
}
public long sizeOnLeft()
{
return sizeOnLeft;
}
public long sizeOnRight()
{
return sizeOnRight;
}
public long rowsOnLeft()
{
return rowsOnLeft;
}
public long rowsOnRight()
{
return rowsOnRight;
}
public TreeDifference(Token left, Token right, byte depth)
{
super(null, left, right, depth, null);
}
public long totalRows()
{
return rowsOnLeft + rowsOnRight;
}
}
The public interface to a range in the tree.
NB: A TreeRange should not be returned by a public method unless the
parents of the range it represents are already invalidated, since it
will allow someone to modify the hash. Alternatively, a TreeRange
may be created with a null tree, indicating that it is read only.
/**
* The public interface to a range in the tree.
*
* NB: A TreeRange should not be returned by a public method unless the
* parents of the range it represents are already invalidated, since it
* will allow someone to modify the hash. Alternatively, a TreeRange
* may be created with a null tree, indicating that it is read only.
*/
public static class TreeRange extends Range<Token>
{
public static final long serialVersionUID = 1L;
private final MerkleTree tree;
public final byte depth;
private final Hashable hashable;
TreeRange(MerkleTree tree, Token left, Token right, byte depth, Hashable hashable)
{
super(left, right);
this.tree = tree;
this.depth = depth;
this.hashable = hashable;
}
public void hash(byte[] hash)
{
assert tree != null : "Not intended for modification!";
hashable.hash(hash);
}
public byte[] hash()
{
return hashable.hash();
}
Params: - entry – Row to mix into the hash for this range.
/**
* @param entry Row to mix into the hash for this range.
*/
public void addHash(RowHash entry)
{
assert tree != null : "Not intended for modification!";
assert hashable instanceof Leaf;
hashable.addHash(entry.hash, entry.size);
}
public void ensureHashInitialised()
{
assert tree != null : "Not intended for modification!";
assert hashable instanceof Leaf;
if (hashable.hash == null)
hashable.hash = EMPTY_HASH;
}
public void addAll(Iterator<RowHash> entries)
{
while (entries.hasNext())
addHash(entries.next());
}
@Override
public String toString()
{
StringBuilder buff = new StringBuilder("#<TreeRange ");
buff.append(super.toString()).append(" depth=").append(depth);
return buff.append(">").toString();
}
}
Returns the leaf (range) of a given tree in increasing order.
If the full range covered by the tree don't wrap, then it will return the
ranges in increasing order.
If the full range wrap, the first *and* last range returned by the
iterator will be the wrapping range. It is the only case where the same
leaf will be returned twice.
/**
* Returns the leaf (range) of a given tree in increasing order.
* If the full range covered by the tree don't wrap, then it will return the
* ranges in increasing order.
* If the full range wrap, the first *and* last range returned by the
* iterator will be the wrapping range. It is the only case where the same
* leaf will be returned twice.
*/
public static class TreeRangeIterator extends AbstractIterator<TreeRange> implements Iterable<TreeRange>, PeekingIterator<TreeRange>
{
// stack of ranges to visit
private final ArrayDeque<TreeRange> tovisit;
// interesting range
private final MerkleTree tree;
TreeRangeIterator(MerkleTree tree)
{
tovisit = new ArrayDeque<TreeRange>();
tovisit.add(new TreeRange(tree, tree.fullRange.left, tree.fullRange.right, (byte)0, tree.root));
this.tree = tree;
}
Find the next TreeRange.
Returns: The next TreeRange.
/**
* Find the next TreeRange.
*
* @return The next TreeRange.
*/
public TreeRange computeNext()
{
while (!tovisit.isEmpty())
{
TreeRange active = tovisit.pop();
if (active.hashable instanceof Leaf)
{
// found a leaf invalid range
if (active.isWrapAround() && !tovisit.isEmpty())
// put to be taken again last
tovisit.addLast(active);
return active;
}
Inner node = (Inner)active.hashable;
TreeRange left = new TreeRange(tree, active.left, node.token, inc(active.depth), node.lchild);
TreeRange right = new TreeRange(tree, node.token, active.right, inc(active.depth), node.rchild);
if (right.isWrapAround())
{
// whatever is on the left is 'after' everything we have seen so far (it has greater tokens)
tovisit.addLast(left);
tovisit.addFirst(right);
}
else
{
// do left first then right
tovisit.addFirst(right);
tovisit.addFirst(left);
}
}
return endOfData();
}
public Iterator<TreeRange> iterator()
{
return this;
}
}
An inner node in the MerkleTree. Inners can contain cached hash values, which
are the binary hash of their two children.
/**
* An inner node in the MerkleTree. Inners can contain cached hash values, which
* are the binary hash of their two children.
*/
static class Inner extends Hashable
{
public static final long serialVersionUID = 1L;
static final byte IDENT = 2;
public final Token token;
private Hashable lchild;
private Hashable rchild;
private static final InnerSerializer serializer = new InnerSerializer();
Constructs an Inner with the given token and children, and a null hash.
/**
* Constructs an Inner with the given token and children, and a null hash.
*/
public Inner(Token token, Hashable lchild, Hashable rchild)
{
super(null);
this.token = token;
this.lchild = lchild;
this.rchild = rchild;
}
public Hashable lchild()
{
return lchild;
}
public Hashable rchild()
{
return rchild;
}
public void lchild(Hashable child)
{
lchild = child;
}
public void rchild(Hashable child)
{
rchild = child;
}
Hashable calc()
{
if (hash == null)
{
// hash and size haven't been calculated; calc children then compute
Hashable lnode = lchild.calc();
Hashable rnode = rchild.calc();
// cache the computed value
hash(lnode.hash, rnode.hash);
sizeOfRange = lnode.sizeOfRange + rnode.sizeOfRange;
rowsInRange = lnode.rowsInRange + rnode.rowsInRange;
}
return this;
}
Recursive toString.
/**
* Recursive toString.
*/
public void toString(StringBuilder buff, int maxdepth)
{
buff.append("#<").append(getClass().getSimpleName());
buff.append(" ").append(token);
buff.append(" hash=").append(Hashable.toString(hash()));
buff.append(" children=[");
if (maxdepth < 1)
{
buff.append("#");
}
else
{
if (lchild == null)
buff.append("null");
else
lchild.toString(buff, maxdepth-1);
buff.append(" ");
if (rchild == null)
buff.append("null");
else
rchild.toString(buff, maxdepth-1);
}
buff.append("]>");
}
@Override
public String toString()
{
StringBuilder buff = new StringBuilder();
toString(buff, 1);
return buff.toString();
}
private static class InnerSerializer implements IPartitionerDependentSerializer<Inner>
{
public void serialize(Inner inner, DataOutputPlus out, int version) throws IOException
{
if (version < MessagingService.VERSION_30)
{
if (inner.hash == null)
out.writeInt(-1);
else
{
out.writeInt(inner.hash.length);
out.write(inner.hash);
}
}
Token.serializer.serialize(inner.token, out, version);
Hashable.serializer.serialize(inner.lchild, out, version);
Hashable.serializer.serialize(inner.rchild, out, version);
}
public Inner deserialize(DataInput in, IPartitioner p, int version) throws IOException
{
if (version < MessagingService.VERSION_30)
{
int hashLen = in.readInt();
byte[] hash = hashLen >= 0 ? new byte[hashLen] : null;
if (hash != null)
in.readFully(hash);
}
Token token = Token.serializer.deserialize(in, p, version);
Hashable lchild = Hashable.serializer.deserialize(in, p, version);
Hashable rchild = Hashable.serializer.deserialize(in, p, version);
return new Inner(token, lchild, rchild);
}
public long serializedSize(Inner inner, int version)
{
long size = 0;
if (version < MessagingService.VERSION_30)
{
size += inner.hash == null
? TypeSizes.sizeof(-1)
: TypeSizes.sizeof(inner.hash().length) + inner.hash().length;
}
size += Token.serializer.serializedSize(inner.token, version)
+ Hashable.serializer.serializedSize(inner.lchild, version)
+ Hashable.serializer.serializedSize(inner.rchild, version);
return size;
}
}
}
A leaf node in the MerkleTree. Because the MerkleTree represents a much
larger perfect binary tree of depth hashdepth, a Leaf object contains
the value that would be contained in the perfect tree at its position.
When rows are added to the MerkleTree using TreeRange.validate(), the
tree extending below the Leaf is generated in memory, but only the root
is stored in the Leaf.
/**
* A leaf node in the MerkleTree. Because the MerkleTree represents a much
* larger perfect binary tree of depth hashdepth, a Leaf object contains
* the value that would be contained in the perfect tree at its position.
*
* When rows are added to the MerkleTree using TreeRange.validate(), the
* tree extending below the Leaf is generated in memory, but only the root
* is stored in the Leaf.
*/
static class Leaf extends Hashable
{
public static final long serialVersionUID = 1L;
static final byte IDENT = 1;
private static final LeafSerializer serializer = new LeafSerializer();
Constructs a null hash.
/**
* Constructs a null hash.
*/
public Leaf()
{
super(null);
}
public Leaf(byte[] hash)
{
super(hash);
}
public void toString(StringBuilder buff, int maxdepth)
{
buff.append(toString());
}
@Override
public String toString()
{
return "#<Leaf " + Hashable.toString(hash()) + ">";
}
private static class LeafSerializer implements IPartitionerDependentSerializer<Leaf>
{
public void serialize(Leaf leaf, DataOutputPlus out, int version) throws IOException
{
if (leaf.hash == null)
{
if (version < MessagingService.VERSION_30)
out.writeInt(-1);
else
out.writeByte(-1);
}
else
{
if (version < MessagingService.VERSION_30)
out.writeInt(leaf.hash.length);
else
out.writeByte(leaf.hash.length);
out.write(leaf.hash);
}
}
public Leaf deserialize(DataInput in, IPartitioner p, int version) throws IOException
{
int hashLen = version < MessagingService.VERSION_30 ? in.readInt() : in.readByte();
byte[] hash = hashLen < 0 ? null : new byte[hashLen];
if (hash != null)
in.readFully(hash);
return new Leaf(hash);
}
public long serializedSize(Leaf leaf, int version)
{
long size = version < MessagingService.VERSION_30 ? TypeSizes.sizeof(1) : 1;
if (leaf.hash != null)
{
size += leaf.hash().length;
}
return size;
}
}
}
Hash value representing a row, to be used to pass hashes to the MerkleTree.
The byte[] hash value should contain a digest of the key and value of the row
created using a very strong hash function.
/**
* Hash value representing a row, to be used to pass hashes to the MerkleTree.
* The byte[] hash value should contain a digest of the key and value of the row
* created using a very strong hash function.
*/
public static class RowHash
{
public final Token token;
public final byte[] hash;
public final long size;
public RowHash(Token token, byte[] hash, long size)
{
this.token = token;
this.hash = hash;
this.size = size;
}
@Override
public String toString()
{
return "#<RowHash " + token + " " + Hashable.toString(hash) + " @ " + size + " bytes>";
}
}
Abstract class containing hashing logic, and containing a single hash field.
/**
* Abstract class containing hashing logic, and containing a single hash field.
*/
static abstract class Hashable implements Serializable
{
private static final long serialVersionUID = 1L;
private static final IPartitionerDependentSerializer<Hashable> serializer = new HashableSerializer();
protected byte[] hash;
protected long sizeOfRange;
protected long rowsInRange;
protected Hashable(byte[] hash)
{
this.hash = hash;
}
public byte[] hash()
{
return hash;
}
public long sizeOfRange()
{
return sizeOfRange;
}
public long rowsInRange()
{
return rowsInRange;
}
void hash(byte[] hash)
{
this.hash = hash;
}
Hashable calc()
{
return this;
}
Sets the value of this hash to binaryHash of its children.
Params: - lefthash – Hash of left child.
- righthash – Hash of right child.
/**
* Sets the value of this hash to binaryHash of its children.
* @param lefthash Hash of left child.
* @param righthash Hash of right child.
*/
void hash(byte[] lefthash, byte[] righthash)
{
hash = binaryHash(lefthash, righthash);
}
Mixes the given value into our hash. If our hash is null,
our hash will become the given value.
/**
* Mixes the given value into our hash. If our hash is null,
* our hash will become the given value.
*/
void addHash(byte[] righthash, long sizeOfRow)
{
if (hash == null)
hash = righthash;
else
hash = binaryHash(hash, righthash);
this.sizeOfRange += sizeOfRow;
this.rowsInRange += 1;
}
The primitive with which all hashing should be accomplished: hashes
a left and right value together.
/**
* The primitive with which all hashing should be accomplished: hashes
* a left and right value together.
*/
static byte[] binaryHash(final byte[] left, final byte[] right)
{
return FBUtilities.xor(left, right);
}
public abstract void toString(StringBuilder buff, int maxdepth);
public static String toString(byte[] hash)
{
if (hash == null)
return "null";
return "[" + Hex.bytesToHex(hash) + "]";
}
private static class HashableSerializer implements IPartitionerDependentSerializer<Hashable>
{
public void serialize(Hashable h, DataOutputPlus out, int version) throws IOException
{
if (h instanceof Inner)
{
out.writeByte(Inner.IDENT);
Inner.serializer.serialize((Inner)h, out, version);
}
else if (h instanceof Leaf)
{
out.writeByte(Leaf.IDENT);
Leaf.serializer.serialize((Leaf) h, out, version);
}
else
throw new IOException("Unexpected Hashable: " + h.getClass().getCanonicalName());
}
public Hashable deserialize(DataInput in, IPartitioner p, int version) throws IOException
{
byte ident = in.readByte();
if (Inner.IDENT == ident)
return Inner.serializer.deserialize(in, p, version);
else if (Leaf.IDENT == ident)
return Leaf.serializer.deserialize(in, p, version);
else
throw new IOException("Unexpected Hashable: " + ident);
}
public long serializedSize(Hashable h, int version)
{
if (h instanceof Inner)
return 1 + Inner.serializer.serializedSize((Inner) h, version);
else if (h instanceof Leaf)
return 1 + Leaf.serializer.serializedSize((Leaf) h, version);
throw new AssertionError(h.getClass());
}
}
}
Exceptions that stop recursion early when we are sure that no answer
can be found.
/**
* Exceptions that stop recursion early when we are sure that no answer
* can be found.
*/
static abstract class StopRecursion extends Exception
{
static class BadRange extends StopRecursion
{
public BadRange(){ super(); }
}
static class InvalidHash extends StopRecursion
{
public InvalidHash(){ super(); }
}
static class TooDeep extends StopRecursion
{
public TooDeep(){ super(); }
}
}
}