/* Woodstox Lite ("wool") XML processor
*
* Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.fasterxml.aalto.in;
import com.fasterxml.aalto.util.NameTable;
This is a symbol table implementation used for storing byte-based
PNames
, specifically, instances of (PNameC
). /**
* This is a symbol table implementation used for storing byte-based
* <code>PNames</code>, specifically, instances of
* ({@link PNameC}).
*/
public class CharBasedPNameTable
extends NameTable
{
final static int MIN_HASH_SIZE = 16;
protected static final float DEFAULT_FILL_FACTOR = 0.75f;
/*
/**********************************************************************
/* Actual symbol table data
/**********************************************************************
*/
Primary matching symbols; it's expected most match occur from
here.
/**
* Primary matching symbols; it's expected most match occur from
* here.
*/
protected PNameC[] _symbols;
Overflow buckets; if primary doesn't match, lookup is done
from here.
Note: Number of buckets is half of number of symbol entries, on
assumption there's less need for buckets.
/**
* Overflow buckets; if primary doesn't match, lookup is done
* from here.
*<p>
* Note: Number of buckets is half of number of symbol entries, on
* assumption there's less need for buckets.
*/
protected Bucket[] _buckets;
Current size (number of entries); needed to know if and when
rehash.
/**
* Current size (number of entries); needed to know if and when
* rehash.
*/
protected int _size;
Limit that indicates maximum size this instance can hold before
it needs to be expanded and rehashed. Calculated using fill
factor passed in to constructor.
/**
* Limit that indicates maximum size this instance can hold before
* it needs to be expanded and rehashed. Calculated using fill
* factor passed in to constructor.
*/
protected int _sizeThreshold;
Mask used to get index from hash values; equal to
mBuckets.length - 1
, when mBuckets.length is
a power of two.
/**
* Mask used to get index from hash values; equal to
* <code>mBuckets.length - 1</code>, when mBuckets.length is
* a power of two.
*/
protected int _indexMask;
/*
/**********************************************************************
/* Information about concurrency
/**********************************************************************
*/
Flag that indicates if any changes have been made to the data;
used to both determine if bucket array needs to be copied when
(first) change is made, and potentially if updated bucket list
is to be resync'ed back to master instance.
/**
* Flag that indicates if any changes have been made to the data;
* used to both determine if bucket array needs to be copied when
* (first) change is made, and potentially if updated bucket list
* is to be resync'ed back to master instance.
*/
protected boolean _dirty;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
Main method for constructing a master symbol table instance; will
be called by other public constructors.
Params: - initialSize – Minimum initial size for bucket array; internally
will always use a power of two equal to or bigger than this value.
/**
* Main method for constructing a master symbol table instance; will
* be called by other public constructors.
*
* @param initialSize Minimum initial size for bucket array; internally
* will always use a power of two equal to or bigger than this value.
*/
public CharBasedPNameTable(int initialSize)
{
// Let's set flags so no copying of buckets is needed:
_dirty = true;
// No point in requesting funny initial sizes...
if (initialSize < 1) {
throw new IllegalArgumentException("Can not use negative/zero initial size: "+initialSize);
}
{
int currSize = MIN_HASH_SIZE;
while (currSize < initialSize) {
currSize += currSize;
}
initialSize = currSize;
}
_symbols = new PNameC[initialSize];
_buckets = new Bucket[initialSize >> 1];
// Mask is easy to calc for powers of two.
_indexMask = initialSize - 1;
_size = 0;
/* Let's use 3/4 fill factor...
*/
_sizeThreshold = (initialSize * 3 + 3) >> 2;
}
CharBasedPNameTable(CharBasedPNameTable parent)
{
_symbols = parent._symbols;
_buckets = parent._buckets;
_size = parent._size;
_sizeThreshold = parent._sizeThreshold;
_indexMask = parent._indexMask;
// Need to make copies of arrays, if/when adding new entries
_dirty = false;
}
Method that allows contents of child table to potentially be
"merged in" with contents of this symbol table.
Note that caller has to make sure symbol table passed in is
really a child or sibling of this symbol table.
/**
* Method that allows contents of child table to potentially be
* "merged in" with contents of this symbol table.
*<p>
* Note that caller has to make sure symbol table passed in is
* really a child or sibling of this symbol table.
*/
public synchronized void mergeFromChild(CharBasedPNameTable child)
{
// Let's do a basic sanity check first:
if (child.size() <= size()) { // nothing to add
return;
}
// Okie dokie, let's get the data in!
_symbols = child._symbols;
_buckets = child._buckets;
_size = child._size;
_sizeThreshold = child._sizeThreshold;
_indexMask = child._indexMask;
// Dirty flag... well, let's just clear it, to force copying just
// in case. Shouldn't really matter, for master tables.
_dirty = false;
/* However, we have to mark child as dirty, so that it will not
* be modifying arrays we "took over" (since child may have
* returned an updated table before it stopped fully using
* the SymbolTable: for example, it may still use it for
* parsing PI targets in epilog)
*/
child._dirty = false;
}
/*
/**********************************************************************
// Public API, generic accessors:
/**********************************************************************
*/
@Override
public int size() { return _size; }
@Override
public boolean maybeDirty() { return _dirty; }
/*
/**********************************************************************
/* Public API, accessing symbols
/**********************************************************************
*/
public PNameC findSymbol(char[] buffer, int start, int len, int hash)
{
int index = hash & _indexMask;
PNameC sym = _symbols[index];
// Optimal case; checking existing primary symbol for hash index:
if (sym != null) {
if (sym.equalsPName(buffer, start, len, hash)) {
return sym;
}
// How about collision bucket?
Bucket b = _buckets[index >> 1];
if (b != null) {
sym = b.find(buffer, start, len, hash);
if (sym != null) {
return sym;
}
}
}
return null;
}
public PNameC addSymbol(char[] buffer, int start, int len, int hash)
{
String newStr = new String(buffer, start, len).intern();
PNameC pname = PNameC.construct(newStr, hash);
int index = hash & _indexMask;
boolean primary;
// First... let's see if we can add it without a collision.
if (null == _symbols[index]) {
primary = true; // yup, we are good then
} else {
/* Otherwise, better check if we need to expand; after
* which there may be a slot in primary hash?
*/
if (_size >= _sizeThreshold) {
rehash();
// Need to recalc hash index
index = hash & _indexMask;
primary = (null == _symbols[index]);
} else { // nope: need a bucket
primary = false;
}
}
// good, can just update primary hash table
if (!_dirty) { // need to do copy-on-write?
copyArrays();
}
++_size;
if (primary) {
_symbols[index] = pname;
} else {
// Ok, all right: need to add to a bucket
int bix = (index >> 1);
_buckets[bix] = new Bucket(pname, _buckets[bix]);
}
return pname;
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
Method called when copy-on-write is needed; generally when first
change is made to a derived symbol table.
/**
* Method called when copy-on-write is needed; generally when first
* change is made to a derived symbol table.
*/
private void copyArrays()
{
PNameC[] oldSyms = _symbols;
int size = oldSyms.length;
_symbols = new PNameC[size];
System.arraycopy(oldSyms, 0, _symbols, 0, size);
Bucket[] oldBuckets = _buckets;
size = oldBuckets.length;
_buckets = new Bucket[size];
System.arraycopy(oldBuckets, 0, _buckets, 0, size);
_dirty = true;
}
Method called when size (number of entries) of symbol table grows
so big that load factor is exceeded. Since size has to remain
power of two, arrays will then always be doubled. Main work
is really redistributing old entries into new String/Bucket
entries.
/**
* Method called when size (number of entries) of symbol table grows
* so big that load factor is exceeded. Since size has to remain
* power of two, arrays will then always be doubled. Main work
* is really redistributing old entries into new String/Bucket
* entries.
*/
private void rehash()
{
int size = _symbols.length;
int newSize = size + size;
PNameC[] oldSyms = _symbols;
Bucket[] oldBuckets = _buckets;
_symbols = new PNameC[newSize];
_buckets = new Bucket[newSize >> 1];
// Let's update index mask, threshold, now (needed for rehashing)
_indexMask = newSize - 1;
_sizeThreshold += _sizeThreshold;
int count = 0; // let's do sanity check
/* Need to do two loops, unfortunately, since spillover area is
* only half the size:
*/
for (int i = 0; i < size; ++i) {
PNameC symbol = oldSyms[i];
if (symbol != null) {
++count;
int index = symbol.getCustomHash() & _indexMask;
if (_symbols[index] == null) {
_symbols[index] = symbol;
} else {
int bix = index >> 1;
_buckets[bix] = new Bucket(symbol, _buckets[bix]);
}
}
}
size >>= 1;
for (int i = 0; i < size; ++i) {
Bucket b = oldBuckets[i];
while (b != null) {
++count;
PNameC symbol = b.getSymbol();
int index = symbol.getCustomHash() & _indexMask;
if (_symbols[index] == null) {
_symbols[index] = symbol;
} else {
int bix = index >> 1;
_buckets[bix] = new Bucket(symbol, _buckets[bix]);
}
b = b.getNext();
}
}
if (count != _size) {
throw new Error("Internal error on SymbolTable.rehash(): had "+_size+" entries; now have "+count+".");
}
}
/*
/**********************************************************************
/* Bucket class
/**********************************************************************
*/
This class is a symbol table entry. Each entry acts as a node
in a linked list.
/**
* This class is a symbol table entry. Each entry acts as a node
* in a linked list.
*/
final static class Bucket
{
private final PNameC mSymbol;
private final Bucket mNext;
public Bucket(PNameC symbol, Bucket next)
{
mSymbol = symbol;
mNext = next;
}
public PNameC getSymbol() { return mSymbol; }
public Bucket getNext() { return mNext; }
public PNameC find(char[] buf, int start, int len, int hash)
{
Bucket b = this;
do {
PNameC sym = b.mSymbol;
if (sym.equalsPName(buf, start, len, hash)) {
return sym;
}
b = b.getNext();
} while (b != null);
return null;
}
}
}