/*
* Copyright 2008-present MongoDB, Inc.
* Copyright 2017 Tom Bentley
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.internal.authentication;
import java.nio.CharBuffer;
import java.text.Normalizer;
Utility class for Sasl string preparation.
This class should not be considered a part of the public API.
/**
* Utility class for Sasl string preparation.
*
* <p>This class should not be considered a part of the public API.</p>
*/
public final class SaslPrep {
Return the SASLPrep
-canonicalised version of the given str
for use as a query string. This implements the SASLPrep
algorithm defined in RFC 4013.
Params: - str – The string to canonicalise.
Throws: - IllegalArgumentException – if the string contained prohibited codepoints, or broke the requirements for bidirectional
character handling.
See Also: - RFC 3454, Section 7 for discussion of what a query string is.
Returns: The canonicalised string.
/**
* Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a query string.
* This implements the {@code SASLPrep} algorithm defined in <a href="https://tools.ietf.org/html/rfc4013">RFC 4013</a>.
*
* @param str The string to canonicalise.
* @return The canonicalised string.
* @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the requirements for bidirectional
* character handling.
* @see <a href="https://tools.ietf.org/html/rfc3454#section-7">RFC 3454, Section 7</a> for discussion of what a query string is.
*/
public static String saslPrepQuery(final String str) {
return saslPrep(str, true);
}
Return the SASLPrep
-canonicalised version of the given str
for use as a stored string. This implements the SASLPrep
algorithm defined in RFC 4013.
Params: - str – The string to canonicalise.
Throws: - IllegalArgumentException – if the string contained prohibited codepoints, or broke the requirements for bidirectional
character handling.
See Also: - RFC 3454, Section 7 for discussion of what a stored string is.
Returns: The canonicalised string.
/**
* Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a stored string.
* This implements the {@code SASLPrep} algorithm defined in <a href="https://tools.ietf.org/html/rfc4013">RFC 4013</a>.
*
* @param str The string to canonicalise.
* @return The canonicalised string.
* @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the requirements for bidirectional
* character handling.
* @see <a href="https://tools.ietf.org/html/rfc3454#section-7">RFC 3454, Section 7</a> for discussion of what a stored string is.
*/
public static String saslPrepStored(final String str) {
return saslPrep(str, false);
}
private static String saslPrep(final String str, final boolean allowUnassigned) {
char[] chars = str.toCharArray();
// 1. Map
// non-ASCII space chars mapped to space
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (nonAsciiSpace(ch)) {
chars[i] = ' ';
}
}
int length = 0;
for (int i = 0; i < str.length(); i++) {
char ch = chars[i];
if (!mappedToNothing(ch)) {
chars[length++] = ch;
}
}
// 2. Normalize
String normalized = Normalizer.normalize(CharBuffer.wrap(chars, 0, length), Normalizer.Form.NFKC);
boolean containsRandALCat = false;
boolean containsLCat = false;
boolean initialRandALCat = false;
for (int i = 0; i < normalized.length();) {
final int codepoint = normalized.codePointAt(i);
// 3. Prohibit
if (prohibited(codepoint)) {
throw new IllegalArgumentException("Prohibited character at position " + i);
}
// 4. Check bidi
final byte directionality = Character.getDirectionality(codepoint);
final boolean isRandALcat = directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT
|| directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
containsRandALCat |= isRandALcat;
containsLCat |= directionality == Character.DIRECTIONALITY_LEFT_TO_RIGHT;
initialRandALCat |= i == 0 && isRandALcat;
if (!allowUnassigned && !Character.isDefined(codepoint)) {
throw new IllegalArgumentException("Character at position " + i + " is unassigned");
}
i += Character.charCount(codepoint);
if (initialRandALCat && i >= normalized.length() && !isRandALcat) {
throw new IllegalArgumentException("First character is RandALCat, but last character is not");
}
}
if (containsRandALCat && containsLCat) {
throw new IllegalArgumentException("Contains both RandALCat characters and LCat characters");
}
return normalized;
}
Return true if the given codepoint
is a prohibited character as defined by RFC 4013, Section 2.3.
/**
* Return true if the given {@code codepoint} is a prohibited character as defined by
* <a href="https://tools.ietf.org/html/rfc4013#section-2.3">RFC 4013, Section 2.3</a>.
*/
static boolean prohibited(final int codepoint) {
return nonAsciiSpace((char) codepoint)
|| asciiControl((char) codepoint)
|| nonAsciiControl(codepoint)
|| privateUse(codepoint)
|| nonCharacterCodePoint(codepoint)
|| surrogateCodePoint(codepoint)
|| inappropriateForPlainText(codepoint)
|| inappropriateForCanonical(codepoint)
|| changeDisplayProperties(codepoint)
|| tagging(codepoint);
}
Return true if the given codepoint
is a tagging character as defined by RFC 3454, Appendix C.9.
/**
* Return true if the given {@code codepoint} is a tagging character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.9">RFC 3454, Appendix C.9</a>.
*/
private static boolean tagging(final int codepoint) {
return codepoint == 0xE0001
|| 0xE0020 <= codepoint && codepoint <= 0xE007F;
}
Return true if the given codepoint
is change display properties or deprecated characters as defined by RFC 3454, Appendix C.8.
/**
* Return true if the given {@code codepoint} is change display properties or deprecated characters as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.8">RFC 3454, Appendix C.8</a>.
*/
private static boolean changeDisplayProperties(final int codepoint) {
return codepoint == 0x0340
|| codepoint == 0x0341
|| codepoint == 0x200E
|| codepoint == 0x200F
|| codepoint == 0x202A
|| codepoint == 0x202B
|| codepoint == 0x202C
|| codepoint == 0x202D
|| codepoint == 0x202E
|| codepoint == 0x206A
|| codepoint == 0x206B
|| codepoint == 0x206C
|| codepoint == 0x206D
|| codepoint == 0x206E
|| codepoint == 0x206F;
}
Return true if the given codepoint
is inappropriate for canonical representation characters as defined by RFC 3454, Appendix C.7.
/**
* Return true if the given {@code codepoint} is inappropriate for canonical representation characters as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.7">RFC 3454, Appendix C.7</a>.
*/
private static boolean inappropriateForCanonical(final int codepoint) {
return 0x2FF0 <= codepoint && codepoint <= 0x2FFB;
}
Return true if the given codepoint
is inappropriate for plain text characters as defined by RFC 3454, Appendix C.6.
/**
* Return true if the given {@code codepoint} is inappropriate for plain text characters as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.6">RFC 3454, Appendix C.6</a>.
*/
private static boolean inappropriateForPlainText(final int codepoint) {
return codepoint == 0xFFF9
|| codepoint == 0xFFFA
|| codepoint == 0xFFFB
|| codepoint == 0xFFFC
|| codepoint == 0xFFFD;
}
Return true if the given codepoint
is a surrogate code point as defined by RFC 3454, Appendix C.5.
/**
* Return true if the given {@code codepoint} is a surrogate code point as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.5">RFC 3454, Appendix C.5</a>.
*/
private static boolean surrogateCodePoint(final int codepoint) {
return 0xD800 <= codepoint && codepoint <= 0xDFFF;
}
Return true if the given codepoint
is a non-character code point as defined by RFC 3454, Appendix C.4.
/**
* Return true if the given {@code codepoint} is a non-character code point as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.4">RFC 3454, Appendix C.4</a>.
*/
private static boolean nonCharacterCodePoint(final int codepoint) {
return 0xFDD0 <= codepoint && codepoint <= 0xFDEF
|| 0xFFFE <= codepoint && codepoint <= 0xFFFF
|| 0x1FFFE <= codepoint && codepoint <= 0x1FFFF
|| 0x2FFFE <= codepoint && codepoint <= 0x2FFFF
|| 0x3FFFE <= codepoint && codepoint <= 0x3FFFF
|| 0x4FFFE <= codepoint && codepoint <= 0x4FFFF
|| 0x5FFFE <= codepoint && codepoint <= 0x5FFFF
|| 0x6FFFE <= codepoint && codepoint <= 0x6FFFF
|| 0x7FFFE <= codepoint && codepoint <= 0x7FFFF
|| 0x8FFFE <= codepoint && codepoint <= 0x8FFFF
|| 0x9FFFE <= codepoint && codepoint <= 0x9FFFF
|| 0xAFFFE <= codepoint && codepoint <= 0xAFFFF
|| 0xBFFFE <= codepoint && codepoint <= 0xBFFFF
|| 0xCFFFE <= codepoint && codepoint <= 0xCFFFF
|| 0xDFFFE <= codepoint && codepoint <= 0xDFFFF
|| 0xEFFFE <= codepoint && codepoint <= 0xEFFFF
|| 0xFFFFE <= codepoint && codepoint <= 0xFFFFF
|| 0x10FFFE <= codepoint && codepoint <= 0x10FFFF;
}
Return true if the given codepoint
is a private use character as defined by RFC 3454, Appendix C.3.
/**
* Return true if the given {@code codepoint} is a private use character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.3">RFC 3454, Appendix C.3</a>.
*/
private static boolean privateUse(final int codepoint) {
return 0xE000 <= codepoint && codepoint <= 0xF8FF
|| 0xF000 <= codepoint && codepoint <= 0xFFFFD
|| 0x100000 <= codepoint && codepoint <= 0x10FFFD;
}
Return true if the given ch
is a non-ASCII control character as defined by RFC 3454, Appendix C.2.2.
/**
* Return true if the given {@code ch} is a non-ASCII control character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.2.2">RFC 3454, Appendix C.2.2</a>.
*/
private static boolean nonAsciiControl(final int codepoint) {
return 0x0080 <= codepoint && codepoint <= 0x009F
|| codepoint == 0x06DD
|| codepoint == 0x070F
|| codepoint == 0x180E
|| codepoint == 0x200C
|| codepoint == 0x200D
|| codepoint == 0x2028
|| codepoint == 0x2029
|| codepoint == 0x2060
|| codepoint == 0x2061
|| codepoint == 0x2062
|| codepoint == 0x2063
|| 0x206A <= codepoint && codepoint <= 0x206F
|| codepoint == 0xFEFF
|| 0xFFF9 <= codepoint && codepoint <= 0xFFFC
|| 0x1D173 <= codepoint && codepoint <= 0x1D17A;
}
Return true if the given ch
is an ASCII control character as defined by RFC 3454, Appendix C.2.1.
/**
* Return true if the given {@code ch} is an ASCII control character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.2.1">RFC 3454, Appendix C.2.1</a>.
*/
private static boolean asciiControl(final char ch) {
return ch <= '\u001F' || ch == '\u007F';
}
Return true if the given ch
is a non-ASCII space character as defined by RFC 3454, Appendix C.1.2.
/**
* Return true if the given {@code ch} is a non-ASCII space character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-C.1.2">RFC 3454, Appendix C.1.2</a>.
*/
private static boolean nonAsciiSpace(final char ch) {
return ch == '\u00A0'
|| ch == '\u1680'
|| '\u2000' <= ch && ch <= '\u200B'
|| ch == '\u202F'
|| ch == '\u205F'
|| ch == '\u3000';
}
Return true if the given ch
is a "commonly mapped to nothing" character as defined by RFC 3454,
Appendix B.1.
/**
* Return true if the given {@code ch} is a "commonly mapped to nothing" character as defined by
* <a href="https://tools.ietf.org/html/rfc3454#appendix-B.1">RFC 3454,
* Appendix B.1</a>.
*/
private static boolean mappedToNothing(final char ch) {
return ch == '\u00AD'
|| ch == '\u034F'
|| ch == '\u1806'
|| ch == '\u180B'
|| ch == '\u180C'
|| ch == '\u180D'
|| ch == '\u200B'
|| ch == '\u200C'
|| ch == '\u200D'
|| ch == '\u2060'
|| '\uFE00' <= ch && ch <= '\uFE0F'
|| ch == '\uFEFF';
}
private SaslPrep() {
}
}