/*
 * Copyright 2008-present MongoDB, Inc.
 * Copyright 2017 Tom Bentley
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.mongodb.internal.authentication;

import java.nio.CharBuffer;
import java.text.Normalizer;

Utility class for Sasl string preparation.

This class should not be considered a part of the public API.

/** * Utility class for Sasl string preparation. * * <p>This class should not be considered a part of the public API.</p> */
public final class SaslPrep {
Return the SASLPrep-canonicalised version of the given str for use as a query string. This implements the SASLPrep algorithm defined in RFC 4013.
Params:
  • str – The string to canonicalise.
Throws:
  • IllegalArgumentException – if the string contained prohibited codepoints, or broke the requirements for bidirectional character handling.
See Also:
Returns:The canonicalised string.
/** * Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a query string. * This implements the {@code SASLPrep} algorithm defined in <a href="https://tools.ietf.org/html/rfc4013">RFC 4013</a>. * * @param str The string to canonicalise. * @return The canonicalised string. * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the requirements for bidirectional * character handling. * @see <a href="https://tools.ietf.org/html/rfc3454#section-7">RFC 3454, Section 7</a> for discussion of what a query string is. */
public static String saslPrepQuery(final String str) { return saslPrep(str, true); }
Return the SASLPrep-canonicalised version of the given str for use as a stored string. This implements the SASLPrep algorithm defined in RFC 4013.
Params:
  • str – The string to canonicalise.
Throws:
  • IllegalArgumentException – if the string contained prohibited codepoints, or broke the requirements for bidirectional character handling.
See Also:
Returns:The canonicalised string.
/** * Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a stored string. * This implements the {@code SASLPrep} algorithm defined in <a href="https://tools.ietf.org/html/rfc4013">RFC 4013</a>. * * @param str The string to canonicalise. * @return The canonicalised string. * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the requirements for bidirectional * character handling. * @see <a href="https://tools.ietf.org/html/rfc3454#section-7">RFC 3454, Section 7</a> for discussion of what a stored string is. */
public static String saslPrepStored(final String str) { return saslPrep(str, false); } private static String saslPrep(final String str, final boolean allowUnassigned) { char[] chars = str.toCharArray(); // 1. Map // non-ASCII space chars mapped to space for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); if (nonAsciiSpace(ch)) { chars[i] = ' '; } } int length = 0; for (int i = 0; i < str.length(); i++) { char ch = chars[i]; if (!mappedToNothing(ch)) { chars[length++] = ch; } } // 2. Normalize String normalized = Normalizer.normalize(CharBuffer.wrap(chars, 0, length), Normalizer.Form.NFKC); boolean containsRandALCat = false; boolean containsLCat = false; boolean initialRandALCat = false; for (int i = 0; i < normalized.length();) { final int codepoint = normalized.codePointAt(i); // 3. Prohibit if (prohibited(codepoint)) { throw new IllegalArgumentException("Prohibited character at position " + i); } // 4. Check bidi final byte directionality = Character.getDirectionality(codepoint); final boolean isRandALcat = directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT || directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; containsRandALCat |= isRandALcat; containsLCat |= directionality == Character.DIRECTIONALITY_LEFT_TO_RIGHT; initialRandALCat |= i == 0 && isRandALcat; if (!allowUnassigned && !Character.isDefined(codepoint)) { throw new IllegalArgumentException("Character at position " + i + " is unassigned"); } i += Character.charCount(codepoint); if (initialRandALCat && i >= normalized.length() && !isRandALcat) { throw new IllegalArgumentException("First character is RandALCat, but last character is not"); } } if (containsRandALCat && containsLCat) { throw new IllegalArgumentException("Contains both RandALCat characters and LCat characters"); } return normalized; }
Return true if the given codepoint is a prohibited character as defined by RFC 4013, Section 2.3.
/** * Return true if the given {@code codepoint} is a prohibited character as defined by * <a href="https://tools.ietf.org/html/rfc4013#section-2.3">RFC 4013, Section 2.3</a>. */
static boolean prohibited(final int codepoint) { return nonAsciiSpace((char) codepoint) || asciiControl((char) codepoint) || nonAsciiControl(codepoint) || privateUse(codepoint) || nonCharacterCodePoint(codepoint) || surrogateCodePoint(codepoint) || inappropriateForPlainText(codepoint) || inappropriateForCanonical(codepoint) || changeDisplayProperties(codepoint) || tagging(codepoint); }
Return true if the given codepoint is a tagging character as defined by RFC 3454, Appendix C.9.
/** * Return true if the given {@code codepoint} is a tagging character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.9">RFC 3454, Appendix C.9</a>. */
private static boolean tagging(final int codepoint) { return codepoint == 0xE0001 || 0xE0020 <= codepoint && codepoint <= 0xE007F; }
Return true if the given codepoint is change display properties or deprecated characters as defined by RFC 3454, Appendix C.8.
/** * Return true if the given {@code codepoint} is change display properties or deprecated characters as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.8">RFC 3454, Appendix C.8</a>. */
private static boolean changeDisplayProperties(final int codepoint) { return codepoint == 0x0340 || codepoint == 0x0341 || codepoint == 0x200E || codepoint == 0x200F || codepoint == 0x202A || codepoint == 0x202B || codepoint == 0x202C || codepoint == 0x202D || codepoint == 0x202E || codepoint == 0x206A || codepoint == 0x206B || codepoint == 0x206C || codepoint == 0x206D || codepoint == 0x206E || codepoint == 0x206F; }
Return true if the given codepoint is inappropriate for canonical representation characters as defined by RFC 3454, Appendix C.7.
/** * Return true if the given {@code codepoint} is inappropriate for canonical representation characters as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.7">RFC 3454, Appendix C.7</a>. */
private static boolean inappropriateForCanonical(final int codepoint) { return 0x2FF0 <= codepoint && codepoint <= 0x2FFB; }
Return true if the given codepoint is inappropriate for plain text characters as defined by RFC 3454, Appendix C.6.
/** * Return true if the given {@code codepoint} is inappropriate for plain text characters as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.6">RFC 3454, Appendix C.6</a>. */
private static boolean inappropriateForPlainText(final int codepoint) { return codepoint == 0xFFF9 || codepoint == 0xFFFA || codepoint == 0xFFFB || codepoint == 0xFFFC || codepoint == 0xFFFD; }
Return true if the given codepoint is a surrogate code point as defined by RFC 3454, Appendix C.5.
/** * Return true if the given {@code codepoint} is a surrogate code point as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.5">RFC 3454, Appendix C.5</a>. */
private static boolean surrogateCodePoint(final int codepoint) { return 0xD800 <= codepoint && codepoint <= 0xDFFF; }
Return true if the given codepoint is a non-character code point as defined by RFC 3454, Appendix C.4.
/** * Return true if the given {@code codepoint} is a non-character code point as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.4">RFC 3454, Appendix C.4</a>. */
private static boolean nonCharacterCodePoint(final int codepoint) { return 0xFDD0 <= codepoint && codepoint <= 0xFDEF || 0xFFFE <= codepoint && codepoint <= 0xFFFF || 0x1FFFE <= codepoint && codepoint <= 0x1FFFF || 0x2FFFE <= codepoint && codepoint <= 0x2FFFF || 0x3FFFE <= codepoint && codepoint <= 0x3FFFF || 0x4FFFE <= codepoint && codepoint <= 0x4FFFF || 0x5FFFE <= codepoint && codepoint <= 0x5FFFF || 0x6FFFE <= codepoint && codepoint <= 0x6FFFF || 0x7FFFE <= codepoint && codepoint <= 0x7FFFF || 0x8FFFE <= codepoint && codepoint <= 0x8FFFF || 0x9FFFE <= codepoint && codepoint <= 0x9FFFF || 0xAFFFE <= codepoint && codepoint <= 0xAFFFF || 0xBFFFE <= codepoint && codepoint <= 0xBFFFF || 0xCFFFE <= codepoint && codepoint <= 0xCFFFF || 0xDFFFE <= codepoint && codepoint <= 0xDFFFF || 0xEFFFE <= codepoint && codepoint <= 0xEFFFF || 0xFFFFE <= codepoint && codepoint <= 0xFFFFF || 0x10FFFE <= codepoint && codepoint <= 0x10FFFF; }
Return true if the given codepoint is a private use character as defined by RFC 3454, Appendix C.3.
/** * Return true if the given {@code codepoint} is a private use character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.3">RFC 3454, Appendix C.3</a>. */
private static boolean privateUse(final int codepoint) { return 0xE000 <= codepoint && codepoint <= 0xF8FF || 0xF000 <= codepoint && codepoint <= 0xFFFFD || 0x100000 <= codepoint && codepoint <= 0x10FFFD; }
Return true if the given ch is a non-ASCII control character as defined by RFC 3454, Appendix C.2.2.
/** * Return true if the given {@code ch} is a non-ASCII control character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.2.2">RFC 3454, Appendix C.2.2</a>. */
private static boolean nonAsciiControl(final int codepoint) { return 0x0080 <= codepoint && codepoint <= 0x009F || codepoint == 0x06DD || codepoint == 0x070F || codepoint == 0x180E || codepoint == 0x200C || codepoint == 0x200D || codepoint == 0x2028 || codepoint == 0x2029 || codepoint == 0x2060 || codepoint == 0x2061 || codepoint == 0x2062 || codepoint == 0x2063 || 0x206A <= codepoint && codepoint <= 0x206F || codepoint == 0xFEFF || 0xFFF9 <= codepoint && codepoint <= 0xFFFC || 0x1D173 <= codepoint && codepoint <= 0x1D17A; }
Return true if the given ch is an ASCII control character as defined by RFC 3454, Appendix C.2.1.
/** * Return true if the given {@code ch} is an ASCII control character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.2.1">RFC 3454, Appendix C.2.1</a>. */
private static boolean asciiControl(final char ch) { return ch <= '\u001F' || ch == '\u007F'; }
Return true if the given ch is a non-ASCII space character as defined by RFC 3454, Appendix C.1.2.
/** * Return true if the given {@code ch} is a non-ASCII space character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-C.1.2">RFC 3454, Appendix C.1.2</a>. */
private static boolean nonAsciiSpace(final char ch) { return ch == '\u00A0' || ch == '\u1680' || '\u2000' <= ch && ch <= '\u200B' || ch == '\u202F' || ch == '\u205F' || ch == '\u3000'; }
Return true if the given ch is a "commonly mapped to nothing" character as defined by RFC 3454, Appendix B.1.
/** * Return true if the given {@code ch} is a "commonly mapped to nothing" character as defined by * <a href="https://tools.ietf.org/html/rfc3454#appendix-B.1">RFC 3454, * Appendix B.1</a>. */
private static boolean mappedToNothing(final char ch) { return ch == '\u00AD' || ch == '\u034F' || ch == '\u1806' || ch == '\u180B' || ch == '\u180C' || ch == '\u180D' || ch == '\u200B' || ch == '\u200C' || ch == '\u200D' || ch == '\u2060' || '\uFE00' <= ch && ch <= '\uFE0F' || ch == '\uFEFF'; } private SaslPrep() { } }