Copyright (c) 2000, 2018 IBM Corporation and others. This program and the accompanying materials are made available under the terms of the Eclipse Public License 2.0 which accompanies this distribution, and is available at https://www.eclipse.org/legal/epl-2.0/ SPDX-License-Identifier: EPL-2.0 Contributors: IBM Corporation - initial API and implementation
/******************************************************************************* * Copyright (c) 2000, 2018 IBM Corporation and others. * * This program and the accompanying materials * are made available under the terms of the Eclipse Public License 2.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/legal/epl-2.0/ * * SPDX-License-Identifier: EPL-2.0 * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/
package org.eclipse.jdt.internal.core.index; import java.io.*; import java.util.regex.Pattern; import org.eclipse.jdt.core.compiler.CharOperation; import org.eclipse.jdt.core.search.*; import org.eclipse.jdt.internal.core.util.*; import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues; import org.eclipse.jdt.internal.compiler.util.HashtableOfObject; import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable; import org.eclipse.jdt.internal.compiler.util.SimpleSet; import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray; public class DiskIndex { IndexLocation indexLocation; private int headerInfoOffset; private int numberOfChunks; private int sizeOfLastChunk; private int[] chunkOffsets; private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names private int startOfCategoryTables; private HashtableOfIntValues categoryOffsets, categoryEnds; private int cacheUserCount; private String[][] cachedChunks; // decompressed chunks of document names private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet private char[] cachedCategoryName; private static final int DEFAULT_BUFFER_SIZE = 2048; private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE; private byte[] streamBuffer; private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer private int streamEnd; // used when writing data from the streamBuffer to the file char separator = Index.DEFAULT_SEPARATOR; public static final String SIGNATURE= "INDEX VERSION 1.131"; //$NON-NLS-1$ private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray(); public static boolean DEBUG = false; private static final int RE_INDEXED = -1; private static final int DELETED = -2; private static final int CHUNK_SIZE = 100; private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20); private static final String TMP_EXT = ".tmp"; //$NON-NLS-1$ static class IntList { int size; int[] elements; IntList(int[] elements) { this.elements = elements; this.size = elements.length; } void add(int newElement) { if (this.size == this.elements.length) { int newSize = this.size * 3; if (newSize < 7) newSize = 7; System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size); } this.elements[this.size++] = newElement; } int[] asArray() { int[] result = new int[this.size]; System.arraycopy(this.elements, 0, result, 0, this.size); return result; } } DiskIndex() { this.headerInfoOffset = -1; this.numberOfChunks = -1; this.sizeOfLastChunk = -1; this.chunkOffsets = null; this.documentReferenceSize = -1; this.cacheUserCount = -1; this.cachedChunks = null; this.categoryTables = null; this.cachedCategoryName = null; this.categoryOffsets = null; this.categoryEnds = null; } DiskIndex(IndexLocation location) throws IOException { this(); if (location == null) { throw new IllegalArgumentException(); } this.indexLocation = location; } SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException { // must skip over documents which have been added/changed/deleted in the memory index String[] docNames = readAllDocumentNames(); SimpleSet results = new SimpleSet(docNames.length); if (substring == null) { if (memoryIndex == null) { for (int i = 0, l = docNames.length; i < l; i++) results.add(docNames[i]); } else { SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; for (int i = 0, l = docNames.length; i < l; i++) { String docName = docNames[i]; if (!docsToRefs.containsKey(docName)) results.add(docName); } } } else { if (memoryIndex == null) { for (int i = 0, l = docNames.length; i < l; i++) if (docNames[i].startsWith(substring, 0)) results.add(docNames[i]); } else { SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; for (int i = 0, l = docNames.length; i < l; i++) { String docName = docNames[i]; if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName)) results.add(docName); } } } return results; } private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, Object docs, MemoryIndex memoryIndex, boolean prevResults) throws IOException { // must skip over documents which have been added/changed/deleted in the memory index if (results == null) results = new HashtableOfObject(13); EntryResult result = prevResults ? (EntryResult) results.get(word) : null; if (memoryIndex == null) { if (result == null) results.putUnsafely(word, new EntryResult(word, docs)); else result.addDocumentTable(docs); } else { SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; if (result == null) result = new EntryResult(word, null); int[] docNumbers = readDocumentNumbers(docs); for (int i = 0, l = docNumbers.length; i < l; i++) { String docName = readDocumentName(docNumbers[i]); if (!docsToRefs.containsKey(docName)) result.addDocumentName(docName); } if (!result.isEmpty()) results.put(word, result); } return results; } HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException { // assumes sender has called startQuery() & will call stopQuery() when finished if (this.categoryOffsets == null) return null; // file is empty HashtableOfObject results = null; // initialized if needed // No need to check the results table for duplicates while processing the // first category table or if the first category tables doesn't have any results. boolean prevResults = false; if (key == null) { for (int i = 0, l = categories.length; i < l; i++) { HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); // cache if key is null since its a definite match if (wordsToDocNumbers != null) { char[][] words = wordsToDocNumbers.keyTable; Object[] values = wordsToDocNumbers.valueTable; if (results == null) results = new HashtableOfObject(wordsToDocNumbers.elementSize); for (int j = 0, m = words.length; j < m; j++) if (words[j] != null) results = addQueryResult(results, words[j], values[j], memoryIndex, prevResults); } prevResults = results != null; } if (results != null && this.cachedChunks == null) cacheDocumentNames(); } else { switch (matchRule) { case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE: for (int i = 0, l = categories.length; i < l; i++) { HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); Object value; if (wordsToDocNumbers != null && (value = wordsToDocNumbers.get(key)) != null) results = addQueryResult(results, key, value, memoryIndex, prevResults); prevResults = results != null; } break; case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE: for (int i = 0, l = categories.length; i < l; i++) { HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); if (wordsToDocNumbers != null) { char[][] words = wordsToDocNumbers.keyTable; Object[] values = wordsToDocNumbers.valueTable; for (int j = 0, m = words.length; j < m; j++) { char[] word = words[j]; if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word)) results = addQueryResult(results, word, values[j], memoryIndex, prevResults); } } prevResults = results != null; } break; case SearchPattern.R_REGEXP_MATCH: Pattern pattern = Pattern.compile(new String(key)); for (int i = 0, l = categories.length; i < l; i++) { HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); if (wordsToDocNumbers != null) { char[][] words = wordsToDocNumbers.keyTable; Object[] values = wordsToDocNumbers.valueTable; for (int j = 0, m = words.length; j < m; j++) { char[] word = words[j]; if (word != null && pattern.matcher(new String(word)).matches()) results = addQueryResult(results, word, values[j], memoryIndex, prevResults); } } prevResults = results != null; } break; default: for (int i = 0, l = categories.length; i < l; i++) { HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); if (wordsToDocNumbers != null) { char[][] words = wordsToDocNumbers.keyTable; Object[] values = wordsToDocNumbers.valueTable; for (int j = 0, m = words.length; j < m; j++) { char[] word = words[j]; if (word != null && Index.isMatch(key, word, matchRule)) results = addQueryResult(results, word, values[j], memoryIndex, prevResults); } } prevResults = results != null; } } } return results; } private void cacheDocumentNames() throws IOException { // will need all document names so get them now this.cachedChunks = new String[this.numberOfChunks][]; InputStream stream = this.indexLocation.getInputStream(); try { if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1; int offset = this.chunkOffsets[0]; stream.skip(offset); this.streamBuffer = new byte[BUFFER_READ_SIZE]; this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); for (int i = 0; i < this.numberOfChunks; i++) { int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE; readChunk(this.cachedChunks[i] = new String[size], stream, 0, size); } } catch (IOException e) { this.cachedChunks = null; throw e; } finally { stream.close(); this.indexLocation.close(); this.streamBuffer = null; BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; } } private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) { int onDiskLength = onDiskNames.length; Object[] docNames = memoryIndex.docsToReferences.keyTable; Object[] referenceTables = memoryIndex.docsToReferences.valueTable; if (onDiskLength == 0) { // disk index was empty, so add every indexed document for (int i = 0, l = referenceTables.length; i < l; i++) if (referenceTables[i] != null) indexedDocuments.put(docNames[i], null); // remember each new document String[] newDocNames = new String[indexedDocuments.elementSize]; int count = 0; Object[] added = indexedDocuments.keyTable; for (int i = 0, l = added.length; i < l; i++) if (added[i] != null) newDocNames[count++] = (String) added[i]; Util.sort(newDocNames); for (int i = 0, l = newDocNames.length; i < l; i++) indexedDocuments.put(newDocNames[i], Integer.valueOf(i)); return newDocNames; } // initialize positions as if each document will remain in the same position for (int i = 0; i < onDiskLength; i++) positions[i] = i; // find out if the memory index has any new or deleted documents, if not then the names & positions are the same int numDeletedDocNames = 0; nextPath : for (int i = 0, l = docNames.length; i < l; i++) { String docName = (String) docNames[i]; if (docName != null) { for (int j = 0; j < onDiskLength; j++) { if (docName.equals(onDiskNames[j])) { if (referenceTables[i] == null) { positions[j] = DELETED; numDeletedDocNames++; } else { positions[j] = RE_INDEXED; } continue nextPath; } } if (referenceTables[i] != null) indexedDocuments.put(docName, null); // remember each new document, skip deleted documents which were never saved } } String[] newDocNames = onDiskNames; if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) { // some new documents have been added or some old ones deleted newDocNames = new String[onDiskLength + indexedDocuments.elementSize - numDeletedDocNames]; int count = 0; for (int i = 0; i < onDiskLength; i++) if (positions[i] >= RE_INDEXED) newDocNames[count++] = onDiskNames[i]; // keep each unchanged document Object[] added = indexedDocuments.keyTable; for (int i = 0, l = added.length; i < l; i++) if (added[i] != null) newDocNames[count++] = (String) added[i]; // add each new document Util.sort(newDocNames); for (int i = 0, l = newDocNames.length; i < l; i++) if (indexedDocuments.containsKey(newDocNames[i])) indexedDocuments.put(newDocNames[i], Integer.valueOf(i)); // remember the position for each new document } // need to be able to look up an old position (ref# from a ref[]) and map it to its new position // if its old position == DELETED then its forgotton // if its old position == ReINDEXED then its also forgotten but its new position is needed to map references int count = -1; for (int i = 0; i < onDiskLength;) { switch(positions[i]) { case DELETED : i++; // skip over deleted... references are forgotten break; case RE_INDEXED : String newName = newDocNames[++count]; if (newName.equals(onDiskNames[i])) { indexedDocuments.put(newName, Integer.valueOf(count)); // the reindexed docName that was at position i is now at position count i++; } break; default : if (newDocNames[++count].equals(onDiskNames[i])) positions[i++] = count; // the unchanged docName that was at position i is now at position count } } return newDocNames; } private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) { char[][] categoryNames = categoryToWords.keyTable; Object[] wordSets = categoryToWords.valueTable; for (int i = 0, l = categoryNames.length; i < l; i++) { char[] categoryName = categoryNames[i]; if (categoryName != null) { SimpleWordSet wordSet = (SimpleWordSet) wordSets[i]; HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); if (wordsToDocs == null) this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize)); char[][] words = wordSet.words; for (int j = 0, m = words.length; j < m; j++) { char[] word = words[j]; if (word != null) { Object o = wordsToDocs.get(word); if (o == null) { wordsToDocs.putUnsafely(word, new int[] {newPosition}); } else if (o instanceof IntList) { ((IntList) o).add(newPosition); } else { IntList list = new IntList((int[]) o); list.add(newPosition); wordsToDocs.put(word, list); } } } } } } void initialize(boolean reuseExistingFile) throws IOException { if (this.indexLocation.exists()) { if (reuseExistingFile) { InputStream stream = this.indexLocation.getInputStream(); if (stream == null) { throw new IOException("Failed to use the index file"); //$NON-NLS-1$ } this.streamBuffer = new byte[BUFFER_READ_SIZE]; this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, 128); try { char[] signature = readStreamChars(stream); if (!CharOperation.equals(signature, SIGNATURE_CHARS)) { throw new IOException(Messages.exception_wrongFormat); } this.headerInfoOffset = readStreamInt(stream); if (this.headerInfoOffset > 0) { // file is empty if its not set stream.skip(this.headerInfoOffset - this.bufferEnd); // assume that the header info offset is over current buffer end this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); readHeaderInfo(stream); } } finally { stream.close(); this.indexLocation.close(); } return; } if (!this.indexLocation.delete()) { if (DEBUG) System.out.println("initialize - Failed to delete index " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Failed to delete index " + this.indexLocation); //$NON-NLS-1$ } } if (this.indexLocation.createNewFile()) { FileOutputStream stream = new FileOutputStream(this.indexLocation.getIndexFile(), false); try { this.streamBuffer = new byte[BUFFER_READ_SIZE]; this.bufferIndex = 0; writeStreamChars(stream, SIGNATURE_CHARS); writeStreamInt(stream, -1); // file is empty // write the buffer to the stream if (this.bufferIndex > 0) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } } finally { stream.close(); } } else { if (DEBUG) System.out.println("initialize - Failed to create new index " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Failed to create new index " + this.indexLocation); //$NON-NLS-1$ } } private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException { if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file if (DEBUG) System.out.println("initializeFrom - Failed to delete temp index " + this.indexLocation); //$NON-NLS-1$ } else if (!newIndexFile.createNewFile()) { if (DEBUG) System.out.println("initializeFrom - Failed to create temp index " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Failed to create temp index " + this.indexLocation); //$NON-NLS-1$ } int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize; this.categoryOffsets = new HashtableOfIntValues(size); this.categoryEnds = new HashtableOfIntValues(size); this.categoryTables = new HashtableOfObject(size); this.separator = diskIndex.separator; } private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { // at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults() char[][] oldNames = onDisk.categoryOffsets.keyTable; for (int i = 0, l = oldNames.length; i < l; i++) { char[] oldName = oldNames[i]; if (oldName != null && !this.categoryTables.containsKey(oldName)) this.categoryTables.put(oldName, null); } char[][] categoryNames = this.categoryTables.keyTable; for (int i = 0, l = categoryNames.length; i < l; i++) if (categoryNames[i] != null) mergeCategory(categoryNames[i], onDisk, positions, stream); this.categoryTables = null; } private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); if (wordsToDocs == null) wordsToDocs = new HashtableOfObject(3); HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true); if (oldWordsToDocs != null) { char[][] oldWords = oldWordsToDocs.keyTable; Object[] oldArrayOffsets = oldWordsToDocs.valueTable; nextWord: for (int i = 0, l = oldWords.length; i < l; i++) { char[] oldWord = oldWords[i]; if (oldWord != null) { int[] oldDocNumbers = (int[]) oldArrayOffsets[i]; int length = oldDocNumbers.length; int[] mappedNumbers = new int[length]; int count = 0; for (int j = 0; j < length; j++) { int pos = positions[oldDocNumbers[j]]; if (pos > RE_INDEXED) // forget any reference to a document which was deleted or re_indexed mappedNumbers[count++] = pos; } if (count < length) { if (count == 0) continue nextWord; // skip words which no longer have any references System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count); } Object o = wordsToDocs.get(oldWord); if (o == null) { wordsToDocs.putUnsafely(oldWord, mappedNumbers); } else { IntList list = null; if (o instanceof IntList) { list = (IntList) o; } else { list = new IntList((int[]) o); wordsToDocs.put(oldWord, list); } for (int j = 0; j < count; j++) list.add(mappedNumbers[j]); } } } onDisk.categoryTables.put(categoryName, null); // flush cached table } writeCategoryTable(categoryName, wordsToDocs, stream); } DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException { // assume write lock is held // compute & write out new docNames if (this.indexLocation == null) { throw new IOException("Pre-built index file not writeable"); //$NON-NLS-1$ } String[] docNames = readAllDocumentNames(); int previousLength = docNames.length; int[] positions = new int[previousLength]; // keeps track of the position of each document in the new sorted docNames SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for each new/changed document in the memoryIndex docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex); if (docNames.length == 0) { if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved // index is now empty since all the saved documents were removed DiskIndex newDiskIndex = new DiskIndex(this.indexLocation); newDiskIndex.initialize(false); return newDiskIndex; } boolean usingTmp = false; File oldIndexFile = this.indexLocation.getIndexFile(); String indexFilePath = oldIndexFile.getPath(); if (indexFilePath.endsWith(TMP_EXT)) { // the tmp file could not be renamed last time indexFilePath = indexFilePath.substring(0, indexFilePath.length()-TMP_EXT.length()); usingTmp = true; } else { indexFilePath += TMP_EXT; } DiskIndex newDiskIndex = new DiskIndex(new FileIndexLocation(new File(indexFilePath))); File newIndexFile = newDiskIndex.indexLocation.getIndexFile(); try { newDiskIndex.initializeFrom(this, newIndexFile); FileOutputStream stream = new FileOutputStream(newIndexFile, false); int offsetToHeader = -1; try { newDiskIndex.writeAllDocumentNames(docNames, stream); docNames = null; // free up the space // add each new/changed doc to empty category tables using its new position # if (indexedDocuments.elementSize > 0) { Object[] names = indexedDocuments.keyTable; Object[] integerPositions = indexedDocuments.valueTable; for (int i = 0, l = names.length; i < l; i++) if (names[i] != null) newDiskIndex.copyQueryResults( (HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer) integerPositions[i]).intValue()); } indexedDocuments = null; // free up the space // merge each category table with the new ones & write them out if (previousLength == 0) newDiskIndex.writeCategories(stream); else newDiskIndex.mergeCategories(this, positions, stream); offsetToHeader = newDiskIndex.streamEnd; newDiskIndex.writeHeaderInfo(stream); positions = null; // free up the space } finally { stream.close(); this.streamBuffer = null; } newDiskIndex.writeOffsetToHeader(offsetToHeader); // rename file by deleting previous index file & renaming temp one if (oldIndexFile.exists() && !oldIndexFile.delete()) { if (DEBUG) System.out.println("mergeWith - Failed to delete " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Failed to delete index file " + this.indexLocation); //$NON-NLS-1$ } if (!usingTmp && !newIndexFile.renameTo(oldIndexFile)) { // try again after waiting for two milli secs try { Thread.sleep(2); } catch (InterruptedException e) { //ignore } if (!newIndexFile.renameTo(oldIndexFile)) { if (DEBUG) System.out.println("mergeWith - Failed to rename " + this.indexLocation); //$NON-NLS-1$ usingTmp = true; } } } catch (IOException e) { if (newIndexFile.exists() && !newIndexFile.delete()) if (DEBUG) System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexLocation); //$NON-NLS-1$ throw e; } if (!usingTmp) // rename done, use the new file newDiskIndex.indexLocation = this.indexLocation; return newDiskIndex; } private synchronized String[] readAllDocumentNames() throws IOException { if (this.numberOfChunks <= 0) return CharOperation.NO_STRINGS; InputStream stream = this.indexLocation.getInputStream(); try { int offset = this.chunkOffsets[0]; stream.skip(offset); this.streamBuffer = new byte[BUFFER_READ_SIZE]; this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); int lastIndex = this.numberOfChunks - 1; String[] docNames = new String[lastIndex * CHUNK_SIZE + this.sizeOfLastChunk]; for (int i = 0; i < this.numberOfChunks; i++) readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : this.sizeOfLastChunk); return docNames; } finally { stream.close(); this.indexLocation.close(); this.streamBuffer = null; } } private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException { // result will be null if categoryName is unknown int offset = this.categoryOffsets.get(categoryName); if (offset == HashtableOfIntValues.NO_VALUE) { return null; } if (this.categoryTables == null) { this.categoryTables = new HashtableOfObject(3); } else { HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName); if (cachedTable != null) { if (readDocNumbers) { // must cache remaining document number arrays Object[] arrayOffsets = cachedTable.valueTable; for (int i = 0, l = arrayOffsets.length; i < l; i++) if (arrayOffsets[i] instanceof Integer) arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]); } return cachedTable; } } InputStream stream = this.indexLocation.getInputStream(); HashtableOfObject categoryTable = null; char[][] matchingWords = null; int count = 0; int firstOffset = -1; this.streamBuffer = new byte[BUFFER_READ_SIZE]; try { stream.skip(offset); this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); int size = readStreamInt(stream); try { if (size < 0) { // DEBUG System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ System.err.println("file = "+this.indexLocation); //$NON-NLS-1$ System.err.println("offset = "+offset); //$NON-NLS-1$ System.err.println("size = "+size); //$NON-NLS-1$ System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ } categoryTable = new HashtableOfObject(size); } catch (OutOfMemoryError oom) { // DEBUG oom.printStackTrace(); System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$ System.err.println("file = "+this.indexLocation); //$NON-NLS-1$ System.err.println("offset = "+offset); //$NON-NLS-1$ System.err.println("size = "+size); //$NON-NLS-1$ System.err.println("-------------------- END --------------------"); //$NON-NLS-1$ throw oom; } int largeArraySize = 256; for (int i = 0; i < size; i++) { char[] word = readStreamChars(stream); int arrayOffset = readStreamInt(stream); // if arrayOffset is: // <= 0 then the array size == 1 with the value -> -arrayOffset // > 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately // 256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table) if (arrayOffset <= 0) { categoryTable.putUnsafely(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber } else if (arrayOffset < largeArraySize) { categoryTable.putUnsafely(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size } else { arrayOffset = readStreamInt(stream); // read actual offset if (readDocNumbers) { if (matchingWords == null) matchingWords = new char[size][]; if (count == 0) firstOffset = arrayOffset; matchingWords[count++] = word; } categoryTable.putUnsafely(word, Integer.valueOf(arrayOffset)); // offset to array in the file } } this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable); // cache the table as long as its not too big // in practice, some tables can be greater than 500K when they contain more than 10K elements this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null; } catch (IOException ioe) { this.streamBuffer = null; throw ioe; } finally { stream.close(); this.indexLocation.close(); } if (matchingWords != null && count > 0) { stream = this.indexLocation.getInputStream(); try { stream.skip(firstOffset); this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); for (int i = 0; i < count; i++) { // each array follows the previous one categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream))); } } catch (IOException ioe) { this.streamBuffer = null; throw ioe; } finally { stream.close(); this.indexLocation.close(); } } this.streamBuffer = null; return categoryTable; } private void readChunk(String[] docNames, InputStream stream, int index, int size) throws IOException { String current = new String(readStreamChars(stream)); docNames[index++] = current; for (int i = 1; i < size; i++) { if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) readStreamBuffer(stream); int start = this.streamBuffer[this.bufferIndex++] & 0xFF; int end = this.streamBuffer[this.bufferIndex++] & 0xFF; String next = new String(readStreamChars(stream)); if (start > 0) { if (end > 0) { int length = current.length(); next = current.substring(0, start) + next + current.substring(length - end, length); } else { next = current.substring(0, start) + next; } } else if (end > 0) { int length = current.length(); next = next + current.substring(length - end, length); } docNames[index++] = next; current = next; } } synchronized String readDocumentName(int docNumber) throws IOException { if (this.cachedChunks == null) this.cachedChunks = new String[this.numberOfChunks][]; int chunkNumber = docNumber / CHUNK_SIZE; String[] chunk = this.cachedChunks[chunkNumber]; if (chunk == null) { boolean isLastChunk = chunkNumber == this.numberOfChunks - 1; int start = this.chunkOffsets[chunkNumber]; int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start; if (numberOfBytes < 0) throw new IllegalArgumentException(); this.streamBuffer = new byte[numberOfBytes]; this.bufferIndex = 0; InputStream file = this.indexLocation.getInputStream(); try { file.skip(start); if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes) throw new IOException(); } catch (IOException ioe) { this.streamBuffer = null; throw ioe; } finally { file.close(); this.indexLocation.close(); } int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE; chunk = new String[numberOfNames]; try { readChunk(chunk, null, 0, numberOfNames); } catch (IOException ioe) { this.streamBuffer = null; throw ioe; } this.cachedChunks[chunkNumber] = chunk; } this.streamBuffer = null; return chunk[docNumber - (chunkNumber * CHUNK_SIZE)]; } synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException { // arrayOffset is either a cached array of docNumbers or an Integer offset in the file if (arrayOffset instanceof int[]) return (int[]) arrayOffset; InputStream stream = this.indexLocation.getInputStream(); try { int offset = ((Integer) arrayOffset).intValue(); stream.skip(offset); this.streamBuffer = new byte[BUFFER_READ_SIZE]; this.bufferIndex = 0; this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); return readStreamDocumentArray(stream, readStreamInt(stream)); } finally { stream.close(); this.indexLocation.close(); this.streamBuffer = null; } } private void readHeaderInfo(InputStream stream) throws IOException { // must be same order as writeHeaderInfo() this.numberOfChunks = readStreamInt(stream); this.sizeOfLastChunk = this.streamBuffer[this.bufferIndex++] & 0xFF; this.documentReferenceSize = this.streamBuffer[this.bufferIndex++] & 0xFF; this.separator = (char) (this.streamBuffer[this.bufferIndex++] & 0xFF); long length = this.indexLocation.length(); if (length != -1 && this.numberOfChunks > length) { // not an accurate check, but good enough https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612 if (DEBUG) System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$ } this.chunkOffsets = new int[this.numberOfChunks]; for (int i = 0; i < this.numberOfChunks; i++) this.chunkOffsets[i] = readStreamInt(stream); this.startOfCategoryTables = readStreamInt(stream); int size = readStreamInt(stream); this.categoryOffsets = new HashtableOfIntValues(size); this.categoryEnds = new HashtableOfIntValues(size); if (length != -1 && size > length) { // not an accurate check, but good enough https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612 if (DEBUG) System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$ throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$ } char[] previousCategory = null; int offset = -1; for (int i = 0; i < size; i++) { char[] categoryName = INTERNED_CATEGORY_NAMES.get(readStreamChars(stream)); offset = readStreamInt(stream); this.categoryOffsets.put(categoryName, offset); // cache offset to category table if (previousCategory != null) { this.categoryEnds.put(previousCategory, offset); // cache end of the category table } previousCategory = categoryName; } if (previousCategory != null) { this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table } this.categoryTables = new HashtableOfObject(3); } synchronized void startQuery() { this.cacheUserCount++; } synchronized void stopQuery() { if (--this.cacheUserCount < 0) { // clear cached items this.cacheUserCount = -1; this.cachedChunks = null; if (this.categoryTables != null) { if (this.cachedCategoryName == null) { this.categoryTables = null; } else if (this.categoryTables.elementSize > 1) { HashtableOfObject newTables = new HashtableOfObject(3); newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName)); this.categoryTables = newTables; } } } } private void readStreamBuffer(InputStream stream) throws IOException { // if we're about to read a known amount at the end of the existing buffer, but it does not completely fit // so we need to shift the remaining bytes to be read, and fill the buffer from the stream if (this.bufferEnd < this.streamBuffer.length) { if (stream.available() == 0) return; // we're at the end of the stream - nothing left to read } int bytesInBuffer = this.bufferEnd - this.bufferIndex; if (bytesInBuffer > 0) System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer); this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex); this.bufferIndex = 0; }
Reads in a string from the specified data input stream. The string has been encoded using a modified UTF-8 format.

The first two bytes are read as an unsigned short. This value gives the number of following bytes that are in the encoded string, not the length of the resulting string. The following bytes are then interpreted as bytes encoding characters in the UTF-8 format and are converted into characters.

This method blocks until all the bytes are read, the end of the stream is detected, or an exception is thrown.

Params:
  • stream – a data input stream.
Throws:
  • EOFException – if this end of data input is reached while reading it.
  • IOException – if an I/O error occurs while reading data input.
  • UTFDataFormatException – if the bytes do not represent a valid UTF-8 encoding of a Unicode string.
Returns: UTF decoded string as a char array
/** * Reads in a string from the specified data input stream. The * string has been encoded using a modified UTF-8 format. * <p> * The first two bytes are read as an unsigned short. * This value gives the number of following bytes that are in the encoded string, * not the length of the resulting string. The following bytes are then * interpreted as bytes encoding characters in the UTF-8 format * and are converted into characters. * <p> * This method blocks until all the bytes are read, the end of the * stream is detected, or an exception is thrown. * * @param stream a data input stream. * @return UTF decoded string as a char array * @exception EOFException if this end of data input is reached while reading it. * @exception IOException if an I/O error occurs while reading data input. * @exception UTFDataFormatException if the bytes do not represent a * valid UTF-8 encoding of a Unicode string. */
private char[] readStreamChars(InputStream stream) throws IOException { // read chars array length if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) readStreamBuffer(stream); int length = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8; length += this.streamBuffer[this.bufferIndex++] & 0xFF; // fill the chars from bytes buffer char[] word = new char[length]; int i = 0; while (i < length) { // how many characters can be decoded without refilling the buffer? int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3); // all the characters must already be in the buffer if we're at the end of the stream if (charsInBuffer > length || stream == null || (this.bufferEnd != this.streamBuffer.length && stream.available() == 0)) charsInBuffer = length; while (i < charsInBuffer) { byte b = this.streamBuffer[this.bufferIndex++]; switch (b & 0xF0) { case 0x00 : case 0x10 : case 0x20 : case 0x30 : case 0x40 : case 0x50 : case 0x60 : case 0x70 : word[i++]= (char) b; break; case 0xC0 : case 0xD0 : char next = (char) this.streamBuffer[this.bufferIndex++]; if ((next & 0xC0) != 0x80) { throw new UTFDataFormatException(); } char ch = (char) ((b & 0x1F) << 6); ch |= next & 0x3F; word[i++] = ch; break; case 0xE0 : char first = (char) this.streamBuffer[this.bufferIndex++]; char second = (char) this.streamBuffer[this.bufferIndex++]; if ((first & second & 0xC0) != 0x80) { throw new UTFDataFormatException(); } ch = (char) ((b & 0x0F) << 12); ch |= ((first& 0x3F) << 6); ch |= second & 0x3F; word[i++] = ch; break; default: throw new UTFDataFormatException(); } } if (i < length && stream != null) readStreamBuffer(stream); } return word; } private int[] readStreamDocumentArray(InputStream stream, int arraySize) throws IOException { int[] indexes = new int[arraySize]; if (arraySize == 0) return indexes; int i = 0; switch (this.documentReferenceSize) { case 1 : while (i < arraySize) { // how many bytes without refilling the buffer? int bytesInBuffer = i + this.bufferEnd - this.bufferIndex; if (bytesInBuffer > arraySize) bytesInBuffer = arraySize; while (i < bytesInBuffer) { indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF; } if (i < arraySize && stream != null) readStreamBuffer(stream); } break; case 2 : while (i < arraySize) { // how many shorts without refilling the buffer? int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2); if (shortsInBuffer > arraySize) shortsInBuffer = arraySize; while (i < shortsInBuffer) { int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8; indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF); } if (i < arraySize && stream != null) readStreamBuffer(stream); } break; default : while (i < arraySize) { indexes[i++] = readStreamInt(stream); } break; } return indexes; } private int readStreamInt(InputStream stream) throws IOException { if (this.bufferIndex + 4 >= this.bufferEnd) { readStreamBuffer(stream); } int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 24; val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 16; val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8; return val + (this.streamBuffer[this.bufferIndex++] & 0xFF); } private void writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream) throws IOException { if (sortedDocNames.length == 0) throw new IllegalArgumentException(); // assume the file was just created by initializeFrom() this.streamBuffer = new byte[BUFFER_WRITE_SIZE]; this.bufferIndex = 0; this.streamEnd = 0; // in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names writeStreamChars(stream, SIGNATURE_CHARS); this.headerInfoOffset = this.streamEnd; writeStreamInt(stream, -1); // will overwrite with correct value later int size = sortedDocNames.length; this.numberOfChunks = (size / CHUNK_SIZE) + 1; this.sizeOfLastChunk = size % CHUNK_SIZE; if (this.sizeOfLastChunk == 0) { this.numberOfChunks--; this.sizeOfLastChunk = CHUNK_SIZE; } this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a reference this.chunkOffsets = new int[this.numberOfChunks]; int lastIndex = this.numberOfChunks - 1; for (int i = 0; i < this.numberOfChunks; i++) { this.chunkOffsets[i] = this.streamEnd; int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE; int chunkIndex = i * CHUNK_SIZE; String current = sortedDocNames[chunkIndex]; writeStreamChars(stream, current.toCharArray()); for (int j = 1; j < chunkSize; j++) { String next = sortedDocNames[chunkIndex + j]; int len1 = current.length(); int len2 = next.length(); int max = len1 < len2 ? len1 : len2; int start = 0; // number of identical characters at the beginning (also the index of first character that is different) while (current.charAt(start) == next.charAt(start)) { start++; if (max == start) break; // current is 'abba', next is 'abbab' } if (start > 255) start = 255; int end = 0; // number of identical characters at the end while (current.charAt(--len1) == next.charAt(--len2)) { end++; if (len2 == start) break; // current is 'abbba', next is 'abba' if (len1 == 0) break; // current is 'xabc', next is 'xyabc' } if (end > 255) end = 255; if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } this.streamBuffer[this.bufferIndex++] = (byte) start; this.streamBuffer[this.bufferIndex++] = (byte) end; this.streamEnd += 2; int last = next.length() - end; writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR)); current = next; } } this.startOfCategoryTables = this.streamEnd + 1; } private void writeCategories(FileOutputStream stream) throws IOException { char[][] categoryNames = this.categoryTables.keyTable; Object[] tables = this.categoryTables.valueTable; for (int i = 0, l = categoryNames.length; i < l; i++) if (categoryNames[i] != null) writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream); this.categoryTables = null; } private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException { // the format of a category table is as follows: // any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered) // then the number of word->int[] pairs in the table is written // for each word -> int[] pair, the word is written followed by: // an int <= 0 if the array size == 1 // an int > 1 & < 256 for the size of the array if its > 1 & < 256, the document array follows immediately // 256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table) int largeArraySize = 256; Object[] values = wordsToDocs.valueTable; for (int i = 0, l = values.length; i < l; i++) { Object o = values[i]; if (o != null) { if (o instanceof IntList) o = values[i] = ((IntList) values[i]).asArray(); int[] documentNumbers = (int[]) o; if (documentNumbers.length >= largeArraySize) { values[i] = Integer.valueOf(this.streamEnd); writeDocumentNumbers(documentNumbers, stream); } } } this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table this.categoryTables.put(categoryName, null); // flush cached table writeStreamInt(stream, wordsToDocs.elementSize); char[][] words = wordsToDocs.keyTable; for (int i = 0, l = words.length; i < l; i++) { Object o = values[i]; if (o != null) { writeStreamChars(stream, words[i]); if (o instanceof int[]) { int[] documentNumbers = (int[]) o; if (documentNumbers.length == 1) writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero) else writeDocumentNumbers(documentNumbers, stream); } else { writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows writeStreamInt(stream, ((Integer) o).intValue()); // offset in the file of the array of document numbers } } } } private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException { // must store length as a positive int to detect in-lined array of 1 element int length = documentNumbers.length; writeStreamInt(stream, length); Util.sort(documentNumbers); int start = 0; switch (this.documentReferenceSize) { case 1 : while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) { // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex; for (int i=0; i < bytesLeft; i++) { this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; } stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } while (start < length) { this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; } this.streamEnd += length; break; case 2 : while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) { // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2; for (int i=0; i < shortsLeft; i++) { this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; } stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } while (start < length) { this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; } this.streamEnd += length * 2; break; default : while (start < length) { writeStreamInt(stream, documentNumbers[start++]); } break; } } private void writeHeaderInfo(FileOutputStream stream) throws IOException { writeStreamInt(stream, this.numberOfChunks); if ((this.bufferIndex + 3) >= BUFFER_WRITE_SIZE) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk; this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize; this.streamBuffer[this.bufferIndex++] = (byte) this.separator; this.streamEnd += 3; // apend the file with chunk offsets for (int i = 0; i < this.numberOfChunks; i++) { writeStreamInt(stream, this.chunkOffsets[i]); } writeStreamInt(stream, this.startOfCategoryTables); // append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table writeStreamInt(stream, this.categoryOffsets.elementSize); char[][] categoryNames = this.categoryOffsets.keyTable; int[] offsets = this.categoryOffsets.valueTable; for (int i = 0, l = categoryNames.length; i < l; i++) { if (categoryNames[i] != null) { writeStreamChars(stream, categoryNames[i]); writeStreamInt(stream, offsets[i]); } } // ensure buffer is written to the stream if (this.bufferIndex > 0) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } } private void writeOffsetToHeader(int offsetToHeader) throws IOException { if (offsetToHeader > 0) { RandomAccessFile file = new RandomAccessFile(this.indexLocation.getIndexFile(), "rw"); //$NON-NLS-1$ try { file.seek(this.headerInfoOffset); // offset to position in header file.writeInt(offsetToHeader); this.headerInfoOffset = offsetToHeader; // update to reflect the correct offset } finally { file.close(); } } }
Writes a string to the given output stream using UTF-8 encoding in a machine-independent manner.

First, two bytes of the array are giving the number of bytes to follow. This value is the number of bytes actually written out, not the length of the string. Following the length, each character of the string is put in the bytes array, in sequence, using the UTF-8 encoding for the character.

Then the entire byte array is written to the output stream using OutputStream.write(byte[], int, int) method.

Params:
  • array – char array to be written.
Throws:
  • IOException – if an I/O error occurs while writting the bytes array to the stream.
/** * Writes a string to the given output stream using UTF-8 * encoding in a machine-independent manner. * <p> * First, two bytes of the array are giving the number of bytes to * follow. This value is the number of bytes actually written out, * not the length of the string. Following the length, each character * of the string is put in the bytes array, in sequence, using the UTF-8 * encoding for the character. * </p> * <p> * Then the entire byte array is written to the output stream * using {@link OutputStream#write(byte[], int, int)} method. * </p> * * @param array char array to be written. * @exception IOException if an I/O error occurs while writting * the bytes array to the stream. */
private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException { if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } int length = array.length; this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster this.streamEnd += 2; // we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all int totalBytesNeeded = length * 3; if (totalBytesNeeded <= BUFFER_WRITE_SIZE) { if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) { // flush the buffer now to make sure there is room for the array stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } writeStreamChars(stream, array, 0, length); } else { int charsPerWrite = BUFFER_WRITE_SIZE / 3; int start = 0; while (start < length) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; int charsLeftToWrite = length - start; int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite); writeStreamChars(stream, array, start, end); start = end; } } } private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException { // start can NOT be == end // must have checked that there is enough room for end - start * 3 bytes in the buffer int oldIndex = this.bufferIndex; while (start < end) { int ch = array[start++]; if ((ch & 0x007F) == ch) { this.streamBuffer[this.bufferIndex++] = (byte) ch; } else if ((ch & 0x07FF) == ch) { // first two bits are stored in first byte byte b = (byte) (ch >> 6); b &= 0x1F; b |= 0xC0; this.streamBuffer[this.bufferIndex++] = b; // last six bits are stored in second byte b = (byte) (ch & 0x3F); b |= 0x80; this.streamBuffer[this.bufferIndex++] = b; } else { // first four bits are stored in first byte byte b = (byte) (ch >> 12); b &= 0x0F; b |= 0xE0; this.streamBuffer[this.bufferIndex++] = b; // six following bits are stored in second byte b = (byte) (ch >> 6); b &= 0x3F; b |= 0x80; this.streamBuffer[this.bufferIndex++] = b; // last six bits are stored in third byte b = (byte) (ch & 0x3F); b |= 0x80; this.streamBuffer[this.bufferIndex++] = b; } } this.streamEnd += this.bufferIndex - oldIndex; } private void writeStreamInt(FileOutputStream stream, int val) throws IOException { if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE) { stream.write(this.streamBuffer, 0, this.bufferIndex); this.bufferIndex = 0; } this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24); this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16); this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8); this.streamBuffer[this.bufferIndex++] = (byte) val; this.streamEnd += 4; } }