/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.search.uhighlight;

import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.ByteRunAutomaton;

Associates a label with a CharArrayMatcher to distinguish different sources for terms in highlighting
@lucene.internal
/** * Associates a label with a CharArrayMatcher to distinguish different sources for terms in highlighting * * @lucene.internal */
public interface LabelledCharArrayMatcher extends CharArrayMatcher {
Returns:the label for this matcher
/** * @return the label for this matcher */
String getLabel();
Associates a label with a CharArrayMatcher
/** * Associates a label with a CharArrayMatcher */
static LabelledCharArrayMatcher wrap(String label, CharArrayMatcher in) { return new LabelledCharArrayMatcher() { @Override public String getLabel() { return label; } @Override public boolean match(char[] s, int offset, int length) { return in.match(s, offset, length); } }; }
Returns a representation of the automaton that matches char[] instead of byte[]
/** * Returns a representation of the automaton that matches char[] instead of byte[] */
static LabelledCharArrayMatcher wrap(String label, ByteRunAutomaton runAutomaton) { return wrap(label, (chars, offset, length) -> { int state = 0; final int maxIdx = offset + length; for (int i = offset; i < maxIdx; i++) { final int code = chars[i]; int b; // UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 ) if (code < 0x80) { state = runAutomaton.step(state, code); if (state == -1) return false; } else if (code < 0x800) { b = (0xC0 | (code >> 6)); state = runAutomaton.step(state, b); if (state == -1) return false; b = (0x80 | (code & 0x3F)); state = runAutomaton.step(state, b); if (state == -1) return false; } else { // more complex byte[] utf8Bytes = new byte[4 * (maxIdx - i)]; int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes); for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) { state = runAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF); if (state == -1) return false; } break; } } return runAutomaton.isAccept(state); }); } }