java/10 : jdk.rmic/sun/rmi/rmic/iiop/StaticStringsHash.java

StaticStringsHash
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 * Licensed Materials - Property of IBM
 * RMI-IIOP v1.0
 * Copyright IBM Corp. 1998 1999  All Rights Reserved
 *
 */

package sun.rmi.rmic.iiop;

StaticStringsHash takes an array of constant strings and
uses several different hash methods to try to find the
'best' one for that set. The set of methods is currently
fixed, but with a little work could be made extensible thru
subclassing.

The current set of methods is:

 length() - works well when all strings are different length.
 charAt(n) - works well when one offset into all strings is different.
 hashCode() - works well with larger arrays.

After constructing an instance over the set of strings, the
getKey(String) method can be used to use the selected hash
method to produce a key.  The method string will contain
"length()", "charAt(n)", or "hashCode()", and is intended for use by
code generators.

The keys array will contain the full set of unique keys.

The buckets array will contain a set of arrays, one for
each key in the keys, where buckets[x][y]
is an index into the strings array.
Author:      Bryan Atsatt/**
 * StaticStringsHash takes an array of constant strings and
 * uses several different hash methods to try to find the
 * 'best' one for that set. The set of methods is currently
 * fixed, but with a little work could be made extensible thru
 * subclassing.
 * <p>
 * The current set of methods is:
 * <ol>
 * <li> length() - works well when all strings are different length.</li>
 * <li> charAt(n) - works well when one offset into all strings is different.</li>
 * <li> hashCode() - works well with larger arrays.</li>
 * </ol>
 * After constructing an instance over the set of strings, the
 * <code>getKey(String)</code> method can be used to use the selected hash
 * method to produce a key.  The <code>method</code> string will contain
 * "length()", "charAt(n)", or "hashCode()", and is intended for use by
 * code generators.
 * <p>
 * The <code>keys</code> array will contain the full set of unique keys.
 * <p>
 * The <code>buckets</code> array will contain a set of arrays, one for
 * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
 * is an index into the <code>strings</code> array.
 * @author      Bryan Atsatt
 */
public class StaticStringsHash {

    The set of strings upon which the hash info is created /** The set of strings upon which the hash info is created */
    public String[] strings = null;

    Unique hash keys /** Unique hash keys */
    public int[] keys = null;

    Buckets for each key, where buckets[x][y] is an index
into the strings[] array. /** Buckets for each key, where buckets[x][y] is an index
     * into the strings[] array. */
    public int[][] buckets = null;

    The method to invoke on String to produce the hash key /** The method to invoke on String to produce the hash key */
    public String method = null;

    Get a key for the given string using the
selected hash method.
Params: str – the string to return a key for.
Returns: the key./** Get a key for the given string using the
     * selected hash method.
     * @param str the string to return a key for.
     * @return the key.
     */
    public int getKey(String str) {
        switch (keyKind) {
        case LENGTH: return str.length();
        case CHAR_AT: return str.charAt(charAt);
        case HASH_CODE: return str.hashCode();
        }
        throw new Error("Bad keyKind");
    }

    Constructor
Params: strings – the set of strings upon which to
find an optimal hash method. Must not contain
duplicates./** Constructor
     * @param strings the set of strings upon which to
     * find an optimal hash method. Must not contain
     * duplicates.
     */
    public StaticStringsHash(String[] strings) {
        this.strings = strings;
        length = strings.length;
        tempKeys = new int[length];
        bucketSizes = new int[length];
        setMinStringLength();

        // Decide on the best algorithm based on
        // which one has the smallest maximum
        // bucket depth. First, try length()...

        int currentMaxDepth = getKeys(LENGTH);
        int useCharAt = -1;
        boolean useHashCode = false;

        if (currentMaxDepth > 1) {

            // At least one bucket had more than one
            // entry, so try charAt(i).  If there
            // are a lot of strings in the array,
            // and minStringLength is large, limit
            // the search to a smaller number of
            // characters to avoid spending a lot
            // of time here that is most likely to
            // be pointless...

            int minLength = minStringLength;
            if (length > CHAR_AT_MAX_LINES &&
                length * minLength > CHAR_AT_MAX_CHARS) {
                minLength = length/CHAR_AT_MAX_CHARS;
            }

            charAt = 0;
            for (int i = 0; i < minLength; i++) {
                int charAtDepth = getKeys(CHAR_AT);
                if (charAtDepth < currentMaxDepth) {
                    currentMaxDepth = charAtDepth;
                    useCharAt = i;
                    if (currentMaxDepth == 1) {
                        break;
                    }
                }
                charAt++;
            }
            charAt = useCharAt;


            if (currentMaxDepth > 1) {

                // At least one bucket had more than one
                // entry, try hashCode().
                //
                // Since the cost of computing a full hashCode
                // (for the runtime target string) is much higher
                // than the previous methods, use it only if it is
                // substantially better. The definition of 'substantial'
                // here is not very well founded, and could be improved
                // with some further analysis ;^)

                int hashCodeDepth = getKeys(HASH_CODE);
                if (hashCodeDepth < currentMaxDepth-3) {

                    // Using the full hashCode results in at least
                    // 3 fewer entries in the worst bucket, so will
                    // therefore avoid at least 3 calls to equals()
                    // in the worst case.
                    //
                    // Note that using a number smaller than 3 could
                    // result in using a hashCode when there are only
                    // 2 strings in the array, and that would surely
                    // be a poor performance choice.

                    useHashCode = true;
                }
            }

            // Reset keys if needed...

            if (!useHashCode) {
                if (useCharAt >= 0) {

                    // Use the charAt(i) method...

                    getKeys(CHAR_AT);

                } else {

                    // Use length method...

                    getKeys(LENGTH);
                }
            }
        }

        // Now allocate and fill our real hashKeys array...

        keys = new int[bucketCount];
        System.arraycopy(tempKeys,0,keys,0,bucketCount);

        // Sort keys and bucketSizes arrays...

        boolean didSwap;
        do {
            didSwap = false;
            for (int i = 0; i < bucketCount - 1; i++) {
                if (keys[i] > keys[i+1]) {
                    int temp = keys[i];
                    keys[i] = keys[i+1];
                    keys[i+1] = temp;
                    temp = bucketSizes[i];
                    bucketSizes[i] = bucketSizes[i+1];
                    bucketSizes[i+1] = temp;
                    didSwap = true;
                }
            }
        }
        while (didSwap == true);

        // Allocate our buckets array. Fill the string
        // index slot with an unused key so we can
        // determine which are free...

        int unused = findUnusedKey();
        buckets = new int[bucketCount][];
        for (int i = 0; i < bucketCount; i++) {
            buckets[i] = new int[bucketSizes[i]];
            for (int j = 0; j < bucketSizes[i]; j++) {
                buckets[i][j] = unused;
            }
        }

        // And fill it in...

        for(int i = 0; i < strings.length; i++) {
            int key = getKey(strings[i]);
            for (int j = 0; j < bucketCount; j++) {
                if (keys[j] == key) {
                    int k = 0;
                    while (buckets[j][k] != unused) {
                        k++;
                    }
                    buckets[j][k] = i;
                    break;
                }
            }
        }
    }

    Print an optimized 'contains' method for the
argument strings
/** Print an optimized 'contains' method for the
     * argument strings
     */
    public static void main (String[] args) {
        StaticStringsHash hash = new StaticStringsHash(args);
        System.out.println();
        System.out.println("    public boolean contains(String key) {");
        System.out.println("        switch (key."+hash.method+") {");
        for (int i = 0; i < hash.buckets.length; i++) {
            System.out.println("            case "+hash.keys[i]+": ");
            for (int j = 0; j < hash.buckets[i].length; j++) {
                if (j > 0) {
                    System.out.print("                } else ");
                } else {
                    System.out.print("                ");
                }
                System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
                System.out.println("                    return true;");
            }
            System.out.println("                }");
        }
        System.out.println("        }");
        System.out.println("        return false;");
        System.out.println("    }");
    }

    private int length;
    private int[] tempKeys;
    private int[] bucketSizes;
    private int bucketCount;
    private int maxDepth;
    private int minStringLength = Integer.MAX_VALUE;
    private int keyKind;
    private int charAt;

    private static final int LENGTH = 0;
    private static final int CHAR_AT = 1;
    private static final int HASH_CODE = 2;

    /* Determines the maximum number of charAt(i)
     * tests that will be done. The search is
     * limited because if the number of characters
     * is large enough, the likelyhood of finding
     * a good hash key  based on this method is
     * low. The CHAR_AT_MAX_CHARS limit only
     * applies f there are more strings than
     * CHAR_AT_MAX_LINES.
     */
    private static final int CHAR_AT_MAX_LINES = 50;
    private static final int CHAR_AT_MAX_CHARS = 1000;

    private void resetKeys(int keyKind) {
        this.keyKind = keyKind;
        switch (keyKind) {
        case LENGTH: method = "length()"; break;
        case CHAR_AT: method = "charAt("+charAt+")"; break;
        case HASH_CODE: method = "hashCode()"; break;
        }
        maxDepth = 1;
        bucketCount = 0;
        for (int i = 0; i < length; i++) {
            tempKeys[i] = 0;
            bucketSizes[i] = 0;
        }
    }

    private void setMinStringLength() {
        for (int i = 0; i < length; i++) {
            if (strings[i].length() < minStringLength) {
                minStringLength = strings[i].length();
            }
        }
    }

    private int findUnusedKey() {
        int unused = 0;
        int keysLength = keys.length;

        // Note that we just assume that resource
        // exhaustion will occur rather than an
        // infinite loop here if the set of keys
        // is very large.

        while (true) {
            boolean match = false;
            for (int i = 0; i < keysLength; i++) {
                if (keys[i] == unused) {
                    match = true;
                    break;
                }
            }
            if (match) {
                unused--;
            } else {
                break;
            }
        }
        return unused;
    }

    private int getKeys(int methodKind) {
        resetKeys(methodKind);
        for(int i = 0; i < strings.length; i++) {
            addKey(getKey(strings[i]));
        }
        return maxDepth;
    }

    private void addKey(int key) {

        // Have we seen this one before?

        boolean addIt = true;
        for (int j = 0; j < bucketCount; j++) {
            if (tempKeys[j] == key) {
                addIt = false;
                bucketSizes[j]++;
                if (bucketSizes[j] > maxDepth) {
                    maxDepth = bucketSizes[j];
                }
                break;
            }
        }

        if (addIt) {
            tempKeys[bucketCount] = key;
            bucketSizes[bucketCount] = 1;
            bucketCount++;
        }
    }
}
Params:	str – the string to return a key for.
Returns:	the key.
/

java/ 10/ jdk.rmic/sun/rmi/rmic/iiop/StaticStringsHash.java