Dependencies
Declarations
-
org
-
apache
-
lucene
-
analysis
-
ja
-
GraphvizFormatter
-
sb: StringBuilder
-
BOS_LABEL: String
-
EOS_LABEL: String
-
FONT_NAME: String
-
costs: ConnectionCosts
-
bestPathMap: Map<String, String>
-
GraphvizFormatter(ConnectionCosts): void
-
finish(): String
-
onBacktrace(JapaneseTokenizer, WrappedPositionArray, int, Position, int, char[], boolean): void
-
setBestPathMap(WrappedPositionArray, int, Position, int): void
-
formatNodes(JapaneseTokenizer, WrappedPositionArray, int, Position, char[]): String
-
formatHeader(): String
-
formatTrailer(): String
-
getNodeID(int, int): String
-
-
JapaneseAnalyzer
-
mode: Mode
-
stoptags: Set<String>
-
userDict: UserDictionary
-
JapaneseAnalyzer(): void
-
JapaneseAnalyzer(UserDictionary, Mode, CharArraySet, Set<String>): void
-
getDefaultStopSet(): CharArraySet
-
getDefaultStopTags(): Set<String>
-
DefaultSetHolder
-
createComponents(String): TokenStreamComponents
-
normalize(String, TokenStream): TokenStream
-
-
JapaneseBaseFormFilter
-
JapaneseBaseFormFilterFactory
-
JapaneseIterationMarkCharFilter
-
NORMALIZE_KANJI_DEFAULT: boolean
-
NORMALIZE_KANA_DEFAULT: boolean
-
KANJI_ITERATION_MARK: char
-
HIRAGANA_ITERATION_MARK: char
-
HIRAGANA_VOICED_ITERATION_MARK: char
-
KATAKANA_ITERATION_MARK: char
-
KATAKANA_VOICED_ITERATION_MARK: char
-
FULL_STOP_PUNCTUATION: char
-
h2d: char[]
-
k2d: char[]
-
buffer: RollingCharBuffer
-
bufferPosition: int
-
iterationMarksSpanSize: int
-
iterationMarkSpanEndPosition: int
-
normalizeKanji: boolean
-
normalizeKana: boolean
-
static class initializer
-
JapaneseIterationMarkCharFilter(Reader): void
-
JapaneseIterationMarkCharFilter(Reader, boolean, boolean): void
-
read(char[], int, int): int
-
read(): int
-
normalizeIterationMark(char): char
-
nextIterationMarkSpanSize(): int
-
sourceCharacter(int, int): char
-
normalize(char, char): char
-
normalizedHiragana(char, char): char
-
normalizedKatakana(char, char): char
-
isIterationMark(char): boolean
-
isHiraganaIterationMark(char): boolean
-
isKatakanaIterationMark(char): boolean
-
isKanjiIterationMark(char): boolean
-
lookupHiraganaDakuten(char): char
-
lookupKatakanaDakuten(char): char
-
isHiraganaDakuten(char): boolean
-
isKatakanaDakuten(char): boolean
-
lookup(char, char[], char): char
-
inside(char, char[], char): boolean
-
correct(int): int
-
-
JapaneseIterationMarkCharFilterFactory
-
JapaneseKatakanaStemFilter
-
DEFAULT_MINIMUM_LENGTH: int
-
HIRAGANA_KATAKANA_PROLONGED_SOUND_MARK: char
-
termAttr: CharTermAttribute
-
keywordAttr: KeywordAttribute
-
minimumKatakanaLength: int
-
JapaneseKatakanaStemFilter(TokenStream, int): void
-
JapaneseKatakanaStemFilter(TokenStream): void
-
incrementToken(): boolean
-
stem(char[], int): int
-
isKatakana(char[], int): boolean
-
-
JapaneseKatakanaStemFilterFactory
-
JapaneseNumberFilter
-
termAttr: CharTermAttribute
-
offsetAttr: OffsetAttribute
-
keywordAttr: KeywordAttribute
-
posIncrAttr: PositionIncrementAttribute
-
posLengthAttr: PositionLengthAttribute
-
NO_NUMERAL: char
-
numerals: char[]
-
exponents: char[]
-
state: State
-
numeral: StringBuilder
-
fallThroughTokens: int
-
exhausted: boolean
-
static class initializer
-
JapaneseNumberFilter(TokenStream): void
-
incrementToken(): boolean
-
reset(): void
-
normalizeNumber(String): String
-
parseNumber(NumberBuffer): BigDecimal
-
parseLargePair(NumberBuffer): BigDecimal
-
parseMediumNumber(NumberBuffer): BigDecimal
-
parseMediumPair(NumberBuffer): BigDecimal
-
parseBasicNumber(NumberBuffer): BigDecimal
-
parseLargeKanjiNumeral(NumberBuffer): BigDecimal
-
parseMediumKanjiNumeral(NumberBuffer): BigDecimal
-
isNumeral(String): boolean
-
isNumeral(char): boolean
-
isNumeralPunctuation(String): boolean
-
isNumeralPunctuation(char): boolean
-
isArabicNumeral(char): boolean
-
isHalfWidthArabicNumeral(char): boolean
-
isFullWidthArabicNumeral(char): boolean
-
arabicNumeralValue(char): int
-
isKanjiNumeral(char): boolean
-
kanjiNumeralValue(char): int
-
isDecimalPoint(char): boolean
-
isThousandSeparator(char): boolean
-
NumberBuffer
-
-
JapaneseNumberFilterFactory
-
JapanesePartOfSpeechStopFilter
-
JapanesePartOfSpeechStopFilterFactory
-
JapaneseReadingFormFilter
-
JapaneseReadingFormFilterFactory
-
JapaneseTokenizer
-
Mode
-
DEFAULT_MODE: Mode
-
Type
-
VERBOSE: boolean
-
SEARCH_MODE_KANJI_LENGTH: int
-
SEARCH_MODE_OTHER_LENGTH: int
-
SEARCH_MODE_KANJI_PENALTY: int
-
SEARCH_MODE_OTHER_PENALTY: int
-
MAX_UNKNOWN_WORD_LENGTH: int
-
MAX_BACKTRACE_GAP: int
-
dictionaryMap: EnumMap<Type, Dictionary>
-
fst: TokenInfoFST
-
dictionary: TokenInfoDictionary
-
unkDictionary: UnknownDictionary
-
costs: ConnectionCosts
-
userDictionary: UserDictionary
-
characterDefinition: CharacterDefinition
-
arc: Arc<Long>
-
fstReader: BytesReader
-
wordIdRef: IntsRef
-
userFSTReader: BytesReader
-
userFST: TokenInfoFST
-
buffer: RollingCharBuffer
-
positions: WrappedPositionArray
-
discardPunctuation: boolean
-
searchMode: boolean
-
extendedMode: boolean
-
outputCompounds: boolean
-
outputNBest: boolean
-
nBestCost: int
-
end: boolean
-
lastBackTracePos: int
-
lastTokenPos: int
-
pos: int
-
pending: List<Token>
-
termAtt: CharTermAttribute
-
offsetAtt: OffsetAttribute
-
posIncAtt: PositionIncrementAttribute
-
posLengthAtt: PositionLengthAttribute
-
basicFormAtt: BaseFormAttribute
-
posAtt: PartOfSpeechAttribute
-
readingAtt: ReadingAttribute
-
inflectionAtt: InflectionAttribute
-
JapaneseTokenizer(UserDictionary, boolean, Mode): void
-
JapaneseTokenizer(UserDictionary, boolean, boolean, Mode): void
-
JapaneseTokenizer(AttributeFactory, UserDictionary, boolean, Mode): void
-
JapaneseTokenizer(AttributeFactory, UserDictionary, boolean, boolean, Mode): void
-
JapaneseTokenizer(AttributeFactory, TokenInfoDictionary, UnknownDictionary, ConnectionCosts, UserDictionary, boolean, boolean, Mode): void
-
dotOut: GraphvizFormatter
-
setGraphvizFormatter(GraphvizFormatter): void
-
close(): void
-
reset(): void
-
resetState(): void
-
end(): void
-
computeSecondBestThreshold(int, int): int
-
computePenalty(int, int): int
-
Position
-
pos: int
-
count: int
-
costs: int[]
-
lastRightID: int[]
-
backPos: int[]
-
backIndex: int[]
-
backID: int[]
-
backType: Type[]
-
forwardCount: int
-
forwardPos: int[]
-
forwardID: int[]
-
forwardIndex: int[]
-
forwardType: Type[]
-
grow(): void
-
growForward(): void
-
add(int, int, int, int, int, Type): void
-
addForward(int, int, int, Type): void
-
reset(): void
-
-
add(Dictionary, Position, int, int, Type, boolean): void
-
incrementToken(): boolean
-
WrappedPositionArray
-
parse(): void
-
pruneAndRescore(int, int, int): void
-
Lattice
-
nodeLeft: int[]
-
fragment: char[]
-
dictionaryMap: EnumMap<Type, Dictionary>
-
useEOS: boolean
-
rootCapacity: int
-
rootSize: int
-
rootBase: int
-
lRoot: int[]
-
rRoot: int[]
-
capacity: int
-
nodeCount: int
-
nodeDicType: Type[]
-
nodeWordID: int[]
-
nodeMark: int[]
-
nodeLeftID: int[]
-
nodeRightID: int[]
-
nodeWordCost: int[]
-
nodeLeftCost: int[]
-
nodeRightCost: int[]
-
nodeLeftNode: int[]
-
nodeRightNode: int[]
-
nodeRight: int[]
-
nodeLeftChain: int[]
-
nodeRightChain: int[]
-
setupRoot(int, int): void
-
reserve(int): void
-
setupNodePool(int): void
-
addNode(Type, int, int, int): int
-
positionCount(WrappedPositionArray, int, int): int
-
setup(char[], EnumMap<Type, Dictionary>, WrappedPositionArray, int, int, boolean): void
-
markUnreachable(): void
-
connectionCost(ConnectionCosts, int, int): int
-
calcLeftCost(ConnectionCosts): void
-
calcRightCost(ConnectionCosts): void
-
markSameSpanNode(int, int): void
-
bestPathNodeList(): List<Integer>
-
cost(int): int
-
nBestNodeList(int): List<Integer>
-
bestCost(): int
-
probeDelta(int, int): int
-
debugPrint(): void
-
-
lattice: Lattice
-
registerNode(int, char[]): void
-
fixupPendingList(): void
-
probeDelta(String, String): int
-
calcNBestCost(String): int
-
setNBestCost(int): void
-
backtraceNBest(Position, boolean): void
-
backtrace(Position, int): void
-
getDict(Type): Dictionary
-
isPunctuation(char): boolean
-
-
JapaneseTokenizerFactory
-
NAME: String
-
MODE: String
-
USER_DICT_PATH: String
-
USER_DICT_ENCODING: String
-
DISCARD_PUNCTUATION: String
-
DISCARD_COMPOUND_TOKEN: String
-
NBEST_COST: String
-
NBEST_EXAMPLES: String
-
userDictionary: UserDictionary
-
mode: Mode
-
discardPunctuation: boolean
-
discardCompoundToken: boolean
-
userDictionaryPath: String
-
userDictionaryEncoding: String
-
nbestExamples: String
-
nbestCost: int
-
JapaneseTokenizerFactory(Map<String, String>): void
-
inform(ResourceLoader): void
-
create(AttributeFactory): JapaneseTokenizer
-
-
Token
-
dictionary: Dictionary
-
wordId: int
-
surfaceForm: char[]
-
offset: int
-
length: int
-
position: int
-
positionLength: int
-
type: Type
-
Token(int, char[], int, int, Type, int, Dictionary): void
-
toString(): String
-
getSurfaceForm(): char[]
-
getOffset(): int
-
getLength(): int
-
getSurfaceFormString(): String
-
getReading(): String
-
getPronunciation(): String
-
getPartOfSpeech(): String
-
getInflectionType(): String
-
getInflectionForm(): String
-
getBaseForm(): String
-
getType(): Type
-
isKnown(): boolean
-
isUnknown(): boolean
-
isUser(): boolean
-
getPosition(): int
-
setPositionLength(int): void
-
getPositionLength(): int
-
-
dict
-
BinaryDictionary
-
ResourceScheme
-
DICT_FILENAME_SUFFIX: String
-
TARGETMAP_FILENAME_SUFFIX: String
-
POSDICT_FILENAME_SUFFIX: String
-
DICT_HEADER: String
-
TARGETMAP_HEADER: String
-
POSDICT_HEADER: String
-
VERSION: int
-
resourceScheme: ResourceScheme
-
resourcePath: String
-
buffer: ByteBuffer
-
targetMapOffsets: int[]
-
targetMap: int[]
-
posDict: String[]
-
inflTypeDict: String[]
-
inflFormDict: String[]
-
BinaryDictionary(): void
-
BinaryDictionary(ResourceScheme, String): void
-
getResource(String): InputStream
-
getResource(ResourceScheme, String): InputStream
-
getClassResource(Class<Object>, String): InputStream
-
getClassResource(String): InputStream
-
lookupWordIds(int, IntsRef): void
-
getLeftId(int): int
-
getRightId(int): int
-
getWordCost(int): int
-
getBaseForm(int, char[], int, int): String
-
getReading(int, char[], int, int): String
-
getPartOfSpeech(int): String
-
getPronunciation(int, char[], int, int): String
-
getInflectionType(int): String
-
getInflectionForm(int): String
-
baseFormOffset(int): int
-
readingOffset(int): int
-
pronunciationOffset(int): int
-
hasBaseFormData(int): boolean
-
hasReadingData(int): boolean
-
hasPronunciationData(int): boolean
-
readString(int, int, boolean): String
-
HAS_BASEFORM: int
-
HAS_READING: int
-
HAS_PRONUNCIATION: int
-
-
CharacterDefinition
-
SYMBOL: byte
-
NUMERIC: byte
-
FILENAME_SUFFIX: String
-
HEADER: String
-
VERSION: int
-
CLASS_COUNT: int
-
CharacterClass
-
characterCategoryMap: byte[]
-
invokeMap: boolean[]
-
groupMap: boolean[]
-
NGRAM: byte
-
DEFAULT: byte
-
SPACE: byte
-
ALPHA: byte
-
CYRILLIC: byte
-
GREEK: byte
-
HIRAGANA: byte
-
KATAKANA: byte
-
KANJI: byte
-
KANJINUMERIC: byte
-
CharacterDefinition(): void
-
getCharacterClass(char): byte
-
isInvoke(char): boolean
-
isGroup(char): boolean
-
isKanji(char): boolean
-
lookupCharacterClass(String): byte
-
getInstance(): CharacterDefinition
-
SingletonHolder
-
-
ConnectionCosts
-
Dictionary
-
INTERNAL_SEPARATOR: String
-
getLeftId(int): int
-
getRightId(int): int
-
getWordCost(int): int
-
getPartOfSpeech(int): String
-
getReading(int, char[], int, int): String
-
getBaseForm(int, char[], int, int): String
-
getPronunciation(int, char[], int, int): String
-
getInflectionType(int): String
-
getInflectionForm(int): String
-
-
TokenInfoDictionary
-
TokenInfoFST
-
UnknownDictionary
-
characterDefinition: CharacterDefinition
-
UnknownDictionary(ResourceScheme, String): void
-
UnknownDictionary(): void
-
lookup(char[], int, int): int
-
getCharacterDefinition(): CharacterDefinition
-
getReading(int, char[], int, int): String
-
getInflectionType(int): String
-
getInflectionForm(int): String
-
getInstance(): UnknownDictionary
-
SingletonHolder
-
-
UserDictionary
-
fst: TokenInfoFST
-
segmentations: int[][]
-
data: String[]
-
CUSTOM_DICTIONARY_WORD_ID_OFFSET: int
-
WORD_COST: int
-
LEFT_ID: int
-
RIGHT_ID: int
-
open(Reader): UserDictionary
-
UserDictionary(List<String[]>): void
-
lookup(char[], int, int): int[][]
-
getFST(): TokenInfoFST
-
EMPTY_RESULT: int[][]
-
toIndexArray(Map<Integer, int[]>): int[][]
-
lookupSegmentation(int): int[]
-
getLeftId(int): int
-
getRightId(int): int
-
getWordCost(int): int
-
getReading(int, char[], int, int): String
-
getPartOfSpeech(int): String
-
getBaseForm(int, char[], int, int): String
-
getPronunciation(int, char[], int, int): String
-
getInflectionType(int): String
-
getInflectionForm(int): String
-
getAllFeaturesArray(int): String[]
-
getFeature(int, int[]): String
-
-
- tokenattributes
-
util
-
BinaryDictionaryWriter
-
ID_LIMIT: int
-
implClazz: Class<BinaryDictionary>
-
buffer: ByteBuffer
-
targetMapEndOffset: int
-
lastWordId: int
-
lastSourceId: int
-
targetMap: int[]
-
targetMapOffsets: int[]
-
posDict: ArrayList<String>
-
BinaryDictionaryWriter(Class<BinaryDictionary>, int): void
-
put(String[]): int
-
isKatakana(String): boolean
-
writeKatakana(String): void
-
toKatakana(String): String
-
sharedPrefix(String, String): int
-
addMapping(int, int): void
-
getBaseFileName(): String
-
write(Path): void
-
writeTargetMap(Path): void
-
writePosDict(Path): void
-
writeDictionary(Path): void
-
-
CSVUtil
-
CharacterDefinitionWriter
-
ConnectionCostsBuilder
-
ConnectionCostsWriter
-
DictionaryBuilder
-
ToStringUtil
-
posTranslations: HashMap<String, String>
-
static class initializer
-
getPOSTranslation(String): String
-
inflTypeTranslations: HashMap<String, String>
-
static class initializer
-
getInflectionTypeTranslation(String): String
-
inflFormTranslations: HashMap<String, String>
-
static class initializer
-
getInflectedFormTranslation(String): String
-
getRomanization(String): String
-
getRomanization(Appendable, CharSequence): void
-
-
TokenInfoDictionaryBuilder
-
TokenInfoDictionaryWriter
-
UnknownDictionaryBuilder
-
UnknownDictionaryWriter
-
-
-
ja
-
analysis
-
lucene
-
apache