/*
 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.tools.jlink.internal;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import jdk.internal.jimage.ImageStringsReader;

/*
 * The algorithm used here is outlined in Applications of Finite Automata
 * Representing Large Vocabularies - Claudio L Lucchesi and Tomasz Kowaltowski,
 * 1992, and A Practical Minimal Perfect Hashing Method - Fabiano C. Botelho1,
 * Yoshiharu Kohayakawa, and Nivio Ziviani, 2005.
 *
 * The primary JDK use of this algorithm is managing the jimage location index.
 *
 * The goal of PerfectHashBuilder is to construct an automaton which maps a
 * string key to a unique index 0..N-1, where N is the number of key-value pairs.
 * What makes MPHM effective is that the size of the lookup table is N or very
 * near N, and the minimum lookup is O(1) maximum lookup is O(2).
 *
 * The result of PerfectHashBuilder is two integer arrays, redirect and order.
 * The redirect table provides a 1-1 mapping to the order table, using the
 * reader algorithm described further on.  The order table provides a mapping
 * to entries.  If entries are fixed size and can be put in a direct table, then
 * the order table can be used to construct the direct table and then discarded.
 *
 * The steps for constructing the lookup tables are as follows;
 *
 *   - Compute an MPHM hash for each key, based on a fixed base value modulo N.
 *     Note, the hash is based on the modified UTF-8 of the key, simplifying
 *     computation in native code.
 *
 *   - Combine keys that map to the same hash code (collisions) into bucket
 *     chains.
 *
 *   - Sort bucket chains by length of chains, longest first (most collisions.)
 *     Sorting is done to pack the redirect table with the worst collision
 *     offenders first.
 *
 *   - For each chain, recompute the hash of each key using a new base value.
 *     Recomputation should give a different key distribution. A tally is kept
 *     of where the key maps, using the order table. The tally is used to detect
 *     new collisions. If there are further collisions, then restart
 *     redistribution using a different hash base value.  If a chain is
 *     successfully distributed, then the base value used to compute the hash
 *     is recorded in the redirect table.
 *
 *   - Once all colliding chains are resolved (length > 1), then the chains with
 *     only one entry are used to fill in the empty slots in the order table.
 *     These keys are recorded in the redirect table using the twos complement
 *     of the order index.
 *
 *   - It is possible that a given set of keys cannot be packed into a table of
 *     size N.  If this situation occurs then the size of the table is
 *     adjusted so that keys distribute differently.
 *
 * Readers algoritm;
 *
 *   - Compute the hash for the key using the fixed base value modulo N.  This
 *     will provide an index into the redirect table. The integer value in the
 *     redirect table will determine the next step.
 *
 *   - If the value in the redirect table is positive, then that value is used
 *     to rehash the key to get the index into the order table.
 *
 *   - If the value in the redirect table is negative, then that value is the
 *     twos complement of the index into the order table.
 *
 *   - If the value in the redirect table is zero, then there is no matching
 *     entry.
 *
 *   - Note that the resulting entry needs to be validated to ensure a match.
 *     This is typically done by comparing the key with the key in entry.
 */
public class PerfectHashBuilder<E> {
    private static final int RETRY_LIMIT = 1000;

    private Class<?> entryComponent;
    private Class<?> bucketComponent;

    private final Map<String, Entry<E>> map = new LinkedHashMap<>();
    private int[] redirect;
    private Entry<E>[] order;
    private int count = 0;

    @SuppressWarnings("EqualsAndHashcode")
    public static class Entry<E> {
        private final String key;
        private final E value;

        Entry() {
            this("", null);
        }

        Entry(String key, E value) {
            this.key = key;
            this.value = value;
        }

        String getKey() {
            return key;
        }

        E getValue() {
            return value;
        }

        int hashCode(int seed) {
            return ImageStringsReader.hashCode(key, seed);
        }

        @Override
        public int hashCode() {
            return ImageStringsReader.hashCode(key);
        }

        @Override
        public boolean equals(Object other) {
            if (other == this) {
                return true;
            }
            if (!(other instanceof Entry)) {
                return false;
            }
            Entry<?> entry = (Entry<?>) other;
            return entry.key.equals(key);
        }
    }

    static class Bucket<E> implements Comparable<Bucket<E>> {
        final List<Entry<E>> list = new ArrayList<>();

        void add(Entry<E> entry) {
            list.add(entry);
        }

        int getSize() {
            return list.size();
        }

        List<Entry<E>> getList() {
            return list;
        }

        Entry<E> getFirst() {
            assert !list.isEmpty() : "bucket should never be empty";
            return list.get(0);
        }

        @Override
        public int hashCode() {
            return getFirst().hashCode();
        }

        @Override
        @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
        public boolean equals(Object obj) {
            return this == obj;
        }

        @Override
        public int compareTo(Bucket<E> o) {
            return o.getSize() - getSize();
        }
    }

    public PerfectHashBuilder(Class<?> entryComponent, Class<?> bucketComponent) {
        this.entryComponent = entryComponent;
        this.bucketComponent = bucketComponent;
    }

    public int getCount() {
        return map.size();
    }

    public int[] getRedirect() {
        return redirect.clone();
    }

    public Entry<E>[] getOrder() {
        return order.clone();
    }

    public Entry<E> put(String key, E value) {
        return put(new Entry<>(key, value));
    }

    public Entry<E> put(Entry<E> entry) {
        Entry<E> old = map.put(entry.key, entry);

        if (old == null) {
            count++;
        }

        return old;
    }

    @SuppressWarnings("unchecked")
    public void generate() {
        // If the table is empty then exit early.
        boolean redo = count != 0;

        // Repeat until a valid packing is achieved.
        while (redo) {
            redo = false;

            // Allocate the resulting redirect and order tables.
            redirect = new int[count];
            order = (Entry<E>[])Array.newInstance(entryComponent, count);

            // Place all the entries in bucket chains based on hash. Sort by
            // length of chain.
            Bucket<E>[] sorted = createBuckets();
            int free = 0;

            // Iterate through the chains, longest first.
            for (Bucket<E> bucket : sorted) {
                if (bucket.getSize() != 1) {
                    // Attempt to pack entries until no collisions occur.
                    if (!collidedEntries(bucket, count)) {
                        // Failed to pack. Meed to grow table.
                        redo = true;
                        break;
                    }
                } else {
                    // A no collision entry (bucket.getSize() == 1). Find a free
                    // spot in the order table.
                    for ( ; free < count && order[free] != null; free++) {}

                    // If none found, then grow table.
                    if (free >= count) {
                        redo = true;
                        break;
                    }

                    // Store entry in order table.
                    order[free] = bucket.getFirst();
                    // Twos complement of order index stired in the redirect table.
                    redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = -1 - free;
                    // Update free slot index.
                    free++;
                }
            }

            // If packing failed, then bump table size. Make odd to increase
            // chances of being relatively prime.
            if (redo) {
                count = (count + 1) | 1;
            }
        }
    }

    @SuppressWarnings("unchecked")
    private Bucket<E>[] createBuckets() {
        // Build bucket chains based on key hash.  Collisions end up in same chain.
        Bucket<E>[] buckets = (Bucket<E>[])Array.newInstance(bucketComponent, count);

        map.values().stream().forEach((entry) -> {
            int index = (entry.hashCode() & 0x7FFFFFFF) % count;
            Bucket<E> bucket = buckets[index];

            if (bucket == null) {
                buckets[index] = bucket = new Bucket<>();
            }

            bucket.add(entry);
        });

        // Sort chains, longest first.
        Bucket<E>[] sorted = Arrays.asList(buckets).stream()
                .filter((bucket) -> (bucket != null))
                .sorted()
                .toArray((length) -> {
                    return (Bucket<E>[])Array.newInstance(bucketComponent, length);
                });

        return sorted;
    }

    private boolean collidedEntries(Bucket<E> bucket, int count) {
        // Track packing attempts.
        List<Integer> undo = new ArrayList<>();
        // Start with a new hash seed.
        int seed = ImageStringsReader.HASH_MULTIPLIER + 1;
        int retry = 0;

        // Attempt to pack all the entries in a single chain.
        redo:
        while (true) {
            for (Entry<E> entry : bucket.getList()) {
                // Compute new hash.
                int index = entry.hashCode(seed) % count;

                // If a collision is detected.
                if (order[index] != null) {
                    // Only retry so many times with current table size.
                    if (++retry > RETRY_LIMIT) {
                        return false;
                    }

                    // Undo the attempted packing.
                    undo.stream().forEach((i) -> {
                        order[i] = null;
                    });

                    // Reset the undo list and bump up the hash seed.
                    undo.clear();
                    seed++;

                    // Zero seed is not valid.
                    if (seed == 0) {
                        seed = 1;
                    }

                    // Try again.
                    continue redo;
                }

                // No collision.
                order[index] = entry;
                undo.add(index);
            }

            // Entire chain packed. Record hash seed used.
            redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = seed;

            break;
        }

        return true;
    }
 }