/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.catalina.webresources;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;

This class represents the contents of a jar by determining whether a given resource might be in the cache, based on a bloom filter. This is not a general-purpose bloom filter because it contains logic to strip out characters from the beginning of the key. The hash methods are simple but good enough for this purpose.
/** * This class represents the contents of a jar by determining whether a given * resource <b>might</b> be in the cache, based on a bloom filter. This is not a * general-purpose bloom filter because it contains logic to strip out * characters from the beginning of the key. * * The hash methods are simple but good enough for this purpose. */
public final class JarContents { private final BitSet bits1; private final BitSet bits2;
Constant used by a typical hashing method.
/** * Constant used by a typical hashing method. */
private static final int HASH_PRIME_1 = 31;
Constant used by a typical hashing method.
/** * Constant used by a typical hashing method. */
private static final int HASH_PRIME_2 = 17;
Size of the fixed-length bit table. Larger reduces false positives, smaller saves memory.
/** * Size of the fixed-length bit table. Larger reduces false positives, * smaller saves memory. */
private static final int TABLE_SIZE = 2048;
Parses the passed-in jar and populates the bit array.
Params:
  • jar – the JAR file
/** * Parses the passed-in jar and populates the bit array. * * @param jar the JAR file */
public JarContents(JarFile jar) { Enumeration<JarEntry> entries = jar.entries(); bits1 = new BitSet(TABLE_SIZE); bits2 = new BitSet(TABLE_SIZE); // Enumerations. When will they update this API?! while (entries.hasMoreElements()) { JarEntry entry = entries.nextElement(); String name = entry.getName(); int startPos = 0; // If the path starts with a slash, that's not useful information. // Skipping it increases the significance of our key by // removing an insignificant character. boolean precedingSlash = name.charAt(0) == '/'; if (precedingSlash) { startPos = 1; } // Find the correct table slot int pathHash1 = hashcode(name, startPos, HASH_PRIME_1); int pathHash2 = hashcode(name, startPos, HASH_PRIME_2); bits1.set(pathHash1 % TABLE_SIZE); bits2.set(pathHash2 % TABLE_SIZE); } }
Simple hashcode of a portion of the string. Typically we would use substring, but memory and runtime speed are critical.
Params:
  • content – Wrapping String.
  • startPos – First character in the range.
Returns:hashcode of the range.
/** * Simple hashcode of a portion of the string. Typically we would use * substring, but memory and runtime speed are critical. * * @param content * Wrapping String. * @param startPos * First character in the range. * @return hashcode of the range. */
private int hashcode(String content, int startPos, int hashPrime) { int h = hashPrime/2; int contentLength = content.length(); for (int i = startPos; i < contentLength; i++) { h = hashPrime * h + content.charAt(i); } if (h < 0) { h = h * -1; } return h; }
Method that identifies whether a given path MIGHT be in this jar. Uses the Bloom filter mechanism.
Params:
  • path – Requested path. Sometimes starts with "/WEB-INF/classes".
  • webappRoot – The value of the webapp location, which can be stripped from the path. Typically is "/WEB-INF/classes".
Returns:Whether the prefix of the path is known to be in this jar.
/** * Method that identifies whether a given path <b>MIGHT</b> be in this jar. * Uses the Bloom filter mechanism. * * @param path * Requested path. Sometimes starts with "/WEB-INF/classes". * @param webappRoot * The value of the webapp location, which can be stripped from * the path. Typically is "/WEB-INF/classes". * @return Whether the prefix of the path is known to be in this jar. */
public final boolean mightContainResource(String path, String webappRoot) { int startPos = 0; if (path.startsWith(webappRoot)) { startPos = webappRoot.length(); } if (path.charAt(startPos) == '/') { // ignore leading slash startPos++; } // calculate the hash lazyly and return a boolean value for this path return (bits1.get(hashcode(path, startPos, HASH_PRIME_1) % TABLE_SIZE) && bits2.get(hashcode(path, startPos, HASH_PRIME_2) % TABLE_SIZE)); } }