/*
* Copyright (c) 2020, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.js.runtime.util;
import java.util.regex.Pattern;
Validation of patterns from Unicode Technical Standard #35: UNICODE LOCALE DATA MARKUP LANGUAGE.
https://unicode.org/reports/tr35/
/**
* Validation of patterns from Unicode Technical Standard #35: UNICODE LOCALE DATA MARKUP LANGUAGE.
* https://unicode.org/reports/tr35/
*/
public class UTS35Validator {
private static final Pattern LOCALE_ID_PATTERN = Pattern.compile(unicodeLocaleID());
public static boolean isWellFormedUnicodeBCP47LocaleIdentifier(String languageTag) {
return LOCALE_ID_PATTERN.matcher(languageTag).matches();
}
public static boolean isDigit(char c) {
// digit = [0-9]
return '0' <= c && c <= '9';
}
public static boolean isAlpha(char c) {
// alpha = [A-Z a-z]
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
public static boolean isAlphanum(char c) {
// alphanum = [0-9 A-Z a-z]
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
}
public static boolean isStructurallyValidLanguageSubtag(String language) {
// unicode_language_subtag = alpha{2,3} | alpha{5,8}
int length = language.length();
if (length < 2 || length == 4 || length > 8) {
return false;
}
for (int i = 0; i < length; i++) {
if (!isAlpha(language.charAt(i))) {
return false;
}
}
return true;
}
public static boolean isStructurallyValidRegionSubtag(String region) {
// unicode_region_subtag = (alpha{2} | digit{3})
int length = region.length();
return ((length == 2) && isAlpha(region.charAt(0)) && isAlpha(region.charAt(1))) ||
((length == 3) && isDigit(region.charAt(0)) && isDigit(region.charAt(1)) && isDigit(region.charAt(2)));
}
public static boolean isStructurallyValidScriptSubtag(String script) {
// unicode_script_subtag = alpha{4}
return (script.length() == 4) && isAlpha(script.charAt(0)) && isAlpha(script.charAt(1)) && isAlpha(script.charAt(2)) && isAlpha(script.charAt(3));
}
public static boolean isStructurallyValidType(String type) {
// type = alphanum{3,8} (sep alphanum{3,8})*
int alphanumStart = 0;
for (int i = 0; i < type.length(); i++) {
char c = type.charAt(i);
if (!isAlphanum(c)) {
if (c == '-' || c == '_') { // c is sep
int alphanumLength = i - alphanumStart;
if (3 <= alphanumLength && alphanumLength <= 8) {
alphanumStart = i + 1;
} else {
// not alphanum{3,8}
return false;
}
} else {
// unexpected character
return false;
}
}
}
int alphanumLength = type.length() - alphanumStart;
return 3 <= alphanumLength && alphanumLength <= 8;
}
private static String unicodeLanguageID() {
// unicode_language_id = "root" | (unicode_language_subtag (sep unicode_script_subtag)? |
// unicode_script_subtag) (sep unicode_region_subtag)? (sep unicode_variant_subtag)*
// "root" and tags starting with a script subtag are backwards compatibility syntax
// (not allowed in Unicode BCP 47 locale identifier)
return group(unicodeLanguageSubtag() + group(sep() + unicodeScriptSubtag()) + "?" + group(sep() + unicodeRegionSubtag()) + "?" + group(sep() + unicodeVariantSubtag()) + "*");
}
private static String unicodeLanguageSubtag() {
// unicode_language_subtag = alpha{2,3} | alpha{5,8}
return group(alpha() + "{2,3}|" + alpha() + "{5,8}");
}
private static String unicodeScriptSubtag() {
// unicode_script_subtag = alpha{4}
return alpha() + "{4}";
}
private static String unicodeRegionSubtag() {
// unicode_region_subtag = (alpha{2} | digit{3})
return group(alpha() + "{2}|" + digit() + "{3}");
}
private static String unicodeVariantSubtag() {
// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
return group(alphanum() + "{5,8}|" + digit() + alphanum() + "{3}");
}
private static String sep() {
// sep = [-_]
// _ is backwards compatibility syntax
// (not allowed in Unicode BCP 47 locale identifier)
return "-";
}
private static String digit() {
// digit = [0-9]
return "[0-9]";
}
private static String alpha() {
// alpha = [A-Z a-z]
return "[A-Za-z]";
}
private static String alphanum() {
// alphanum = [0-9 A-Z a-z]
return "[0-9A-Za-z]";
}
private static String unicodeLocaleID() {
// unicode_locale_id = unicode_language_id extensions* pu_extensions?
return group(unicodeLanguageID() + extensions() + "*" + puExtensions() + "?");
}
private static String extensions() {
// extensions = unicode_locale_extensions | transformed_extensions | other_extensions
return group(unicodeLocaleExtensions() + "|" + transformedExtentensions() + "|" + otherExtensions());
}
private static String unicodeLocaleExtensions() {
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |(sep attribute)+ (sep keyword)*)
return group(sep() + "[uU]" + group(group(sep() + keyword()) + "+|" + group(sep() + attribute()) + "+" + group(sep() + keyword()) + "*"));
}
private static String transformedExtentensions() {
// transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) | (sep tfield)+)
return group(sep() + "[tT]" + group(group(sep() + tLang() + group(sep() + tField()) + "*") + "|" + group(sep() + tField()) + "+"));
}
private static String puExtensions() {
// pu_extensions = sep [xX] (sep alphanum{1,8})+
return group(sep() + "[xX]" + group(sep() + alphanum() + "{1,8}") + "+");
}
private static String otherExtensions() {
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+
return group(sep() + "[0-9a-svwyzA-SVWYZ]" + group(sep() + alphanum() + "{2,8}") + "+");
}
private static String keyword() {
// keyword = key (sep type)?
return group(key() + group(sep() + type()) + "?");
}
private static String key() {
// key = alphanum alpha
return group(alphanum() + alpha());
}
private static String type() {
// type = alphanum{3,8} (sep alphanum{3,8})*
return group(alphanum() + "{3,8}" + group(sep() + alphanum() + "{3,8}") + "*");
}
private static String attribute() {
// attribute = alphanum{3,8}
return alphanum() + "{3,8}";
}
private static String tLang() {
// tlang = unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)?
// (sep unicode_variant_subtag)*
return group(unicodeLanguageSubtag() + group(sep() + unicodeScriptSubtag()) + "?" + group(sep() + unicodeRegionSubtag()) + "?" + group(sep() + unicodeVariantSubtag()) + "*");
}
private static String tField() {
// tfield = tkey tvalue
return group(tKey() + tValue());
}
private static String tKey() {
// tkey = alpha digit
return group(alpha() + digit());
}
private static String tValue() {
// tvalue = (sep alphanum{3,8})+
return group(sep() + alphanum() + "{3,8}") + "+";
}
private static String group(String expression) {
return "(?:" + expression + ")";
}
}