/*
 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 *******************************************************************************
 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
 * Agreement between IBM and Sun. This technology is protected by multiple     *
 * US and International patents. This notice and attribution to IBM may not    *
 * to removed.                                                                 *
 *******************************************************************************
 */

package sun.text.normalizer;

import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;

Trie implementation which stores data in char, 16 bits.
Author:synwee
See Also:
  • Trie
Since:release 2.1, Jan 01 2002
/** * Trie implementation which stores data in char, 16 bits. * @author synwee * @see com.ibm.icu.impl.Trie * @since release 2.1, Jan 01 2002 */
// note that i need to handle the block calculations later, since chartrie // in icu4c uses the same index array. public class CharTrie extends Trie { // public constructors ---------------------------------------------

Creates a new Trie with the settings for the trie data.

Unserialize the 32-bit-aligned input stream and use the data for the trie.

Params:
  • inputStream – file input stream to a ICU data file, containing the trie
  • dataManipulate – object which provides methods to parse the char data
Throws:
@draft2.1
/** * <p>Creates a new Trie with the settings for the trie data.</p> * <p>Unserialize the 32-bit-aligned input stream and use the data for the * trie.</p> * @param inputStream file input stream to a ICU data file, containing * the trie * @param dataManipulate object which provides methods to parse the char * data * @throws IOException thrown when data reading fails * @draft 2.1 */
public CharTrie(InputStream inputStream, DataManipulate dataManipulate) throws IOException { super(inputStream, dataManipulate); if (!isCharTrie()) { throw new IllegalArgumentException( "Data given does not belong to a char trie."); } m_friendAgent_ = new FriendAgent(); }
Make a dummy CharTrie. A dummy trie is an empty runtime trie, used when a real data trie cannot be loaded. The trie always returns the initialValue, or the leadUnitValue for lead surrogate code points. The Latin-1 part is always set up to be linear.
Params:
  • initialValue – the initial value that is set for all code points
  • leadUnitValue – the value for lead surrogate code _units_ that do not have associated supplementary data
  • dataManipulate – object which provides methods to parse the char data
/** * Make a dummy CharTrie. * A dummy trie is an empty runtime trie, used when a real data trie cannot * be loaded. * * The trie always returns the initialValue, * or the leadUnitValue for lead surrogate code points. * The Latin-1 part is always set up to be linear. * * @param initialValue the initial value that is set for all code points * @param leadUnitValue the value for lead surrogate code _units_ that do not * have associated supplementary data * @param dataManipulate object which provides methods to parse the char data */
public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); int dataLength, latin1Length, i, limit; char block; /* calculate the actual size of the dummy trie data */ /* max(Latin-1, block 0) */ dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; if(leadUnitValue!=initialValue) { dataLength+=DATA_BLOCK_LENGTH; } m_data_=new char[dataLength]; m_dataLength_=dataLength; m_initialValue_=(char)initialValue; /* fill the index and data arrays */ /* indexes are preset to 0 (block 0) */ /* Latin-1 data */ for(i=0; i<latin1Length; ++i) { m_data_[i]=(char)initialValue; } if(leadUnitValue!=initialValue) { /* indexes for lead surrogate code units to the block after Latin-1 */ block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); i=0xd800>>INDEX_STAGE_1_SHIFT_; limit=0xdc00>>INDEX_STAGE_1_SHIFT_; for(; i<limit; ++i) { m_index_[i]=block; } /* data for lead surrogate code units */ limit=latin1Length+DATA_BLOCK_LENGTH; for(i=latin1Length; i<limit; ++i) { m_data_[i]=(char)leadUnitValue; } } m_friendAgent_ = new FriendAgent(); }
Java friend implementation
/** * Java friend implementation */
public class FriendAgent {
Gives out the index array of the trie
Returns:index array of trie
/** * Gives out the index array of the trie * @return index array of trie */
public char[] getPrivateIndex() { return m_index_; }
Gives out the data array of the trie
Returns:data array of trie
/** * Gives out the data array of the trie * @return data array of trie */
public char[] getPrivateData() { return m_data_; }
Gives out the data offset in the trie
Returns:data offset in the trie
/** * Gives out the data offset in the trie * @return data offset in the trie */
public int getPrivateInitialValue() { return m_initialValue_; } } // public methods --------------------------------------------------
Java friend implementation To store the index and data array into the argument.
Params:
  • friend – java friend UCharacterProperty object to store the array
/** * Java friend implementation * To store the index and data array into the argument. * @param friend java friend UCharacterProperty object to store the array */
public void putIndexData(UCharacterProperty friend) { friend.setIndexData(m_friendAgent_); }
Gets the value associated with the codepoint. If no value is associated with the codepoint, a default value will be returned.
Params:
  • ch – codepoint
Returns:offset to data
@draft2.1
/** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be * returned. * @param ch codepoint * @return offset to data * @draft 2.1 */
public final char getCodePointValue(int ch) { int offset; // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return (offset >= 0) ? m_data_[offset] : m_initialValue_; }
Gets the value to the data which this lead surrogate character points to. Returned data may contain folding offset information for the next trailing surrogate character. This method does not guarantee correct results for trail surrogates.
Params:
  • ch – lead surrogate character
Returns:data value
@draft2.1
/** * Gets the value to the data which this lead surrogate character points * to. * Returned data may contain folding offset information for the next * trailing surrogate character. * This method does not guarantee correct results for trail surrogates. * @param ch lead surrogate character * @return data value * @draft 2.1 */
public final char getLeadValue(char ch) { return m_data_[getLeadOffset(ch)]; }
Get the value associated with a pair of surrogates.
Params:
  • lead – a lead surrogate
  • trail – a trail surrogate
@draft2.1
/** * Get the value associated with a pair of surrogates. * @param lead a lead surrogate * @param trail a trail surrogate * @draft 2.1 */
public final char getSurrogateValue(char lead, char trail) { int offset = getSurrogateOffset(lead, trail); if (offset > 0) { return m_data_[offset]; } return m_initialValue_; }

Get a value from a folding offset (from the value of a lead surrogate) and a trail surrogate.

If the

Params:
  • leadvalue – value associated with the lead surrogate which contains the folding offset
  • trail – surrogate
Returns:trie data value associated with the trail character
@draft2.1
/** * <p>Get a value from a folding offset (from the value of a lead surrogate) * and a trail surrogate.</p> * <p>If the * @param leadvalue value associated with the lead surrogate which contains * the folding offset * @param trail surrogate * @return trie data value associated with the trail character * @draft 2.1 */
public final char getTrailValue(int leadvalue, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } int offset = m_dataManipulate_.getFoldingOffset(leadvalue); if (offset > 0) { return m_data_[getRawOffset(offset, (char)(trail & SURROGATE_MASK_))]; } return m_initialValue_; } // protected methods -----------------------------------------------

Parses the input stream and stores its trie content into a index and data array

Params:
  • inputStream – data input stream containing trie data
Throws:
/** * <p>Parses the input stream and stores its trie content into a index and * data array</p> * @param inputStream data input stream containing trie data * @exception IOException thrown when data reading fails */
protected final void unserialize(InputStream inputStream) throws IOException { DataInputStream input = new DataInputStream(inputStream); int indexDataLength = m_dataOffset_ + m_dataLength_; m_index_ = new char[indexDataLength]; for (int i = 0; i < indexDataLength; i ++) { m_index_[i] = input.readChar(); } m_data_ = m_index_; m_initialValue_ = m_data_[m_dataOffset_]; }
Gets the offset to the data which the surrogate pair points to.
Params:
  • lead – lead surrogate
  • trail – trailing surrogate
Returns:offset to data
@draft2.1
/** * Gets the offset to the data which the surrogate pair points to. * @param lead lead surrogate * @param trail trailing surrogate * @return offset to data * @draft 2.1 */
protected final int getSurrogateOffset(char lead, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } // get fold position for the next trail surrogate int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); // get the real data from the folded lead/trail units if (offset > 0) { return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); } // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return -1; }
Gets the value at the argument index. For use internally in TrieIterator.
Params:
  • index – value at index will be retrieved
See Also:
  • TrieIterator
Returns:32 bit value
@draft2.1
/** * Gets the value at the argument index. * For use internally in TrieIterator. * @param index value at index will be retrieved * @return 32 bit value * @see com.ibm.icu.impl.TrieIterator * @draft 2.1 */
protected final int getValue(int index) { return m_data_[index]; }
Gets the default initial value
Returns:32 bit value
@draft2.1
/** * Gets the default initial value * @return 32 bit value * @draft 2.1 */
protected final int getInitialValue() { return m_initialValue_; } // private data members --------------------------------------------
Default value
/** * Default value */
private char m_initialValue_;
Array of char data
/** * Array of char data */
private char m_data_[];
Agent for friends
/** * Agent for friends */
private FriendAgent m_friendAgent_; }