/*
 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.nashorn.internal.parser;

import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
import static java.lang.Character.LOWERCASE_LETTER;
import static java.lang.Character.OTHER_PUNCTUATION;
import static java.lang.Character.SPACE_SEPARATOR;
import static java.lang.Character.UPPERCASE_LETTER;

import java.util.HashMap;
import java.util.Locale;

JavaScript date parser. This class first tries to parse a date string according to the extended ISO 8601 format specified in ES5 15.9.1.15. If that fails, it falls back to legacy mode in which it accepts a range of different formats.

This class is neither thread-safe nor reusable. Calling the parse() method more than once will yield undefined results.

/** * JavaScript date parser. This class first tries to parse a date string * according to the extended ISO 8601 format specified in ES5 15.9.1.15. * If that fails, it falls back to legacy mode in which it accepts a range * of different formats. * * <p>This class is neither thread-safe nor reusable. Calling the * <tt>parse()</tt> method more than once will yield undefined results.</p> */
public class DateParser {
Constant for index position of parsed year value.
/** Constant for index position of parsed year value. */
public final static int YEAR = 0;
Constant for index position of parsed month value.
/** Constant for index position of parsed month value. */
public final static int MONTH = 1;
Constant for index position of parsed day value.
/** Constant for index position of parsed day value. */
public final static int DAY = 2;
Constant for index position of parsed hour value.
/** Constant for index position of parsed hour value. */
public final static int HOUR = 3;
Constant for index position of parsed minute value.
/** Constant for index position of parsed minute value. */
public final static int MINUTE = 4;
Constant for index position of parsed second value.
/** Constant for index position of parsed second value. */
public final static int SECOND = 5;
Constant for index position of parsed millisecond value.
/** Constant for index position of parsed millisecond value. */
public final static int MILLISECOND = 6;
Constant for index position of parsed time zone offset value.
/** Constant for index position of parsed time zone offset value. */
public final static int TIMEZONE = 7; private enum Token { UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END } private final String string; private final int length; private final Integer[] fields; private int pos = 0; private Token token; private int tokenLength; private Name nameValue; private int numValue; private int currentField = YEAR; private int yearSign = 0; private boolean namedMonth = false; private final static HashMap<String,Name> names = new HashMap<>(); static { addName("monday", Name.DAY_OF_WEEK, 0); addName("tuesday", Name.DAY_OF_WEEK, 0); addName("wednesday", Name.DAY_OF_WEEK, 0); addName("thursday", Name.DAY_OF_WEEK, 0); addName("friday", Name.DAY_OF_WEEK, 0); addName("saturday", Name.DAY_OF_WEEK, 0); addName("sunday", Name.DAY_OF_WEEK, 0); addName("january", Name.MONTH_NAME, 1); addName("february", Name.MONTH_NAME, 2); addName("march", Name.MONTH_NAME, 3); addName("april", Name.MONTH_NAME, 4); addName("may", Name.MONTH_NAME, 5); addName("june", Name.MONTH_NAME, 6); addName("july", Name.MONTH_NAME, 7); addName("august", Name.MONTH_NAME, 8); addName("september", Name.MONTH_NAME, 9); addName("october", Name.MONTH_NAME, 10); addName("november", Name.MONTH_NAME, 11); addName("december", Name.MONTH_NAME, 12); addName("am", Name.AM_PM, 0); addName("pm", Name.AM_PM, 12); addName("z", Name.TIMEZONE_ID, 0); addName("gmt", Name.TIMEZONE_ID, 0); addName("ut", Name.TIMEZONE_ID, 0); addName("utc", Name.TIMEZONE_ID, 0); addName("est", Name.TIMEZONE_ID, -5 * 60); addName("edt", Name.TIMEZONE_ID, -4 * 60); addName("cst", Name.TIMEZONE_ID, -6 * 60); addName("cdt", Name.TIMEZONE_ID, -5 * 60); addName("mst", Name.TIMEZONE_ID, -7 * 60); addName("mdt", Name.TIMEZONE_ID, -6 * 60); addName("pst", Name.TIMEZONE_ID, -8 * 60); addName("pdt", Name.TIMEZONE_ID, -7 * 60); addName("t", Name.TIME_SEPARATOR, 0); }
Construct a new DateParser instance for parsing the given string.
Params:
  • string – the string to be parsed
/** * Construct a new <tt>DateParser</tt> instance for parsing the given string. * @param string the string to be parsed */
public DateParser(final String string) { this.string = string; this.length = string.length(); this.fields = new Integer[TIMEZONE + 1]; }
Try parsing the given string as date according to the extended ISO 8601 format specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. This method returns true if the string could be parsed.
Returns:true if the string could be parsed as date
/** * Try parsing the given string as date according to the extended ISO 8601 format * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. * This method returns <tt>true</tt> if the string could be parsed. * @return true if the string could be parsed as date */
public boolean parse() { return parseEcmaDate() || parseLegacyDate(); }
Try parsing the date string according to the rules laid out in ES5 15.9.1.15. The date string must conform to the following format:
  [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] 

If the string does not contain a time zone offset, the TIMEZONE field is set to 0 (GMT).

Returns:true if string represents a valid ES5 date string.
/** * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. * The date string must conform to the following format: * * <pre> [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre> * * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field * is set to <tt>0</tt> (GMT).</p> * @return true if string represents a valid ES5 date string. */
public boolean parseEcmaDate() { if (token == null) { token = next(); } while (token != Token.END) { switch (token) { case NUMBER: if (currentField == YEAR && yearSign != 0) { // 15.9.1.15.1 Extended year must have six digits if (tokenLength != 6) { return false; } numValue *= yearSign; } else if (!checkEcmaField(currentField, numValue)) { return false; } if (!skipEcmaDelimiter()) { return false; } if (currentField < TIMEZONE) { set(currentField++, numValue); } break; case NAME: if (nameValue == null) { return false; } switch (nameValue.type) { case Name.TIME_SEPARATOR: if (currentField == YEAR || currentField > HOUR) { return false; } currentField = HOUR; break; case Name.TIMEZONE_ID: if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { return false; } break; default: return false; } break; case SIGN: if (peek() == -1) { // END after sign - wrong! return false; } if (currentField == YEAR) { yearSign = numValue; } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds return false; } break; default: return false; } token = next(); } return patchResult(true); }
Try parsing the date using a fuzzy algorithm that can handle a variety of formats.

Numbers separated by ':' are treated as time values, optionally followed by a millisecond value separated by '.'. Other number values are treated as date values. The exact sequence of day, month, and year values to apply is determined heuristically.

English month names and selected time zone names as well as AM/PM markers are recognized and handled properly. Additionally, numeric time zone offsets such as (+|-)hh:mm or (+|-)hhmm are recognized. If the string does not contain a time zone offset the TIMEZONEfield is left undefined, meaning the local time zone should be applied.

English weekday names are recognized but ignored. All text in parentheses is ignored as well. All other text causes parsing to fail.

Returns:true if the string could be parsed
/** * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. * * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values. * The exact sequence of day, month, and year values to apply is determined heuristically.</p> * * <p>English month names and selected time zone names as well as AM/PM markers are recognized * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p> * * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well. * All other text causes parsing to fail.</p> * * @return true if the string could be parsed */
public boolean parseLegacyDate() { if (yearSign != 0 || currentField > DAY) { // we don't support signed years in legacy mode return false; } if (token == null) { token = next(); } while (token != Token.END) { switch (token) { case NUMBER: if (skipDelimiter(':')) { // A number followed by ':' is parsed as time if (!setTimeField(numValue)) { return false; } // consume remaining time tokens do { token = next(); if (token != Token.NUMBER || !setTimeField(numValue)) { return false; } } while (skipDelimiter(isSet(SECOND) ? '.' : ':')); } else { // Parse as date token if (!setDateField(numValue)) { return false; } skipDelimiter('-'); } break; case NAME: if (nameValue == null) { return false; } switch (nameValue.type) { case Name.AM_PM: if (!setAmPm(nameValue.value)) { return false; } break; case Name.MONTH_NAME: if (!setMonth(nameValue.value)) { return false; } break; case Name.TIMEZONE_ID: if (!setTimezone(nameValue.value, false)) { return false; } break; case Name.TIME_SEPARATOR: return false; default: break; } if (nameValue.type != Name.TIMEZONE_ID) { skipDelimiter('-'); } break; case SIGN: if (peek() == -1) { // END after sign - wrong! return false; } if (!setTimezone(readTimeZoneOffset(), true)) { return false; } break; case PARENTHESIS: if (!skipParentheses()) { return false; } break; case SEPARATOR: break; default: return false; } token = next(); } return patchResult(false); }
Get the parsed date and time fields as an array of Integers.

If parsing was successful, all fields are guaranteed to be set except for the TIMEZONE field which may be null, meaning that local time zone offset should be applied.

Returns:the parsed date fields
/** * Get the parsed date and time fields as an array of <tt>Integers</tt>. * * <p>If parsing was successful, all fields are guaranteed to be set except for the * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone * offset should be applied.</p> * * @return the parsed date fields */
public Integer[] getDateFields() { return fields; } private boolean isSet(final int field) { return fields[field] != null; } private Integer get(final int field) { return fields[field]; } private void set(final int field, final int value) { fields[field] = value; } private int peek() { return pos < length ? string.charAt(pos) : -1; } // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates private boolean skipNumberDelimiter(final char c) { if (pos < length - 1 && string.charAt(pos) == c && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) { token = null; pos++; return true; } return false; } private boolean skipDelimiter(final char c) { if (pos < length && string.charAt(pos) == c) { token = null; pos++; return true; } return false; } private Token next() { if (pos >= length) { tokenLength = 0; return Token.END; } final char c = string.charAt(pos); if (c > 0x80) { tokenLength = 1; pos++; return Token.UNKNOWN; // We only deal with ASCII here } final int type = Character.getType(c); switch (type) { case DECIMAL_DIGIT_NUMBER: numValue = readNumber(6); return Token.NUMBER; case SPACE_SEPARATOR : case OTHER_PUNCTUATION: tokenLength = 1; pos++; return Token.SEPARATOR; case UPPERCASE_LETTER: case LOWERCASE_LETTER: nameValue = readName(); return Token.NAME; default: tokenLength = 1; pos++; switch (c) { case '(': return Token.PARENTHESIS; case '-': case '+': numValue = c == '-' ? -1 : 1; return Token.SIGN; default: return Token.UNKNOWN; } } } private static boolean checkLegacyField(final int field, final int value) { switch (field) { case HOUR: return isHour(value); case MINUTE: case SECOND: return isMinuteOrSecond(value); case MILLISECOND: return isMillisecond(value); default: // skip validation on other legacy fields as we don't know what's what return true; } } private boolean checkEcmaField(final int field, final int value) { switch (field) { case YEAR: return tokenLength == 4; case MONTH: return tokenLength == 2 && isMonth(value); case DAY: return tokenLength == 2 && isDay(value); case HOUR: return tokenLength == 2 && isHour(value); case MINUTE: case SECOND: return tokenLength == 2 && isMinuteOrSecond(value); case MILLISECOND: // we allow millisecond to be less than 3 digits return tokenLength < 4 && isMillisecond(value); default: return true; } } private boolean skipEcmaDelimiter() { switch (currentField) { case YEAR: case MONTH: return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1; case DAY: return peek() == 'T' || peek() == -1; case HOUR: case MINUTE: return skipNumberDelimiter(':') || endOfTime(); case SECOND: return skipNumberDelimiter('.') || endOfTime(); default: return true; } } private boolean endOfTime() { final int c = peek(); return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; } private static boolean isAsciiLetter(final char ch) { return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); } private static boolean isAsciiDigit(final char ch) { return '0' <= ch && ch <= '9'; } private int readNumber(final int maxDigits) { final int start = pos; int n = 0; final int max = Math.min(length, pos + maxDigits); while (pos < max && isAsciiDigit(string.charAt(pos))) { n = n * 10 + string.charAt(pos++) - '0'; } tokenLength = pos - start; return n; } private Name readName() { final int start = pos; final int limit = Math.min(pos + 3, length); // first read up to the key length while (pos < limit && isAsciiLetter(string.charAt(pos))) { pos++; } final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); final Name name = names.get(key); // then advance to end of name while (pos < length && isAsciiLetter(string.charAt(pos))) { pos++; } tokenLength = pos - start; // make sure we have the full name or a prefix if (name != null && name.matches(string, start, tokenLength)) { return name; } return null; } private int readTimeZoneOffset() { final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; int offset = readNumber(2); skipDelimiter(':'); offset = offset * 60 + readNumber(2); return sign * offset; } private boolean skipParentheses() { int parenCount = 1; while (pos < length && parenCount != 0) { final char c = string.charAt(pos++); if (c == '(') { parenCount++; } else if (c == ')') { parenCount--; } } return true; } private static int getDefaultValue(final int field) { switch (field) { case MONTH: case DAY: return 1; default: return 0; } } private static boolean isDay(final int n) { return 1 <= n && n <= 31; } private static boolean isMonth(final int n) { return 1 <= n && n <= 12; } private static boolean isHour(final int n) { return 0 <= n && n <= 24; } private static boolean isMinuteOrSecond(final int n) { return 0 <= n && n < 60; } private static boolean isMillisecond(final int n) { return 0<= n && n < 1000; } private boolean setMonth(final int m) { if (!isSet(MONTH)) { namedMonth = true; set(MONTH, m); return true; } return false; } private boolean setDateField(final int n) { for (int field = YEAR; field != HOUR; field++) { if (!isSet(field)) { // no validation on legacy date fields set(field, n); return true; } } return false; } private boolean setTimeField(final int n) { for (int field = HOUR; field != TIMEZONE; field++) { if (!isSet(field)) { if (checkLegacyField(field, n)) { set(field, n); return true; } return false; } } return false; } private boolean setTimezone(final int offset, final boolean asNumericOffset) { if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { set(TIMEZONE, offset); return true; } return false; } private boolean setAmPm(final int offset) { if (!isSet(HOUR)) { return false; } final int hour = get(HOUR); if (hour >= 0 && hour <= 12) { set(HOUR, hour + offset); } return true; } private boolean patchResult(final boolean strict) { // sanity checks - make sure we have something if (!isSet(YEAR) && !isSet(HOUR)) { return false; } if (isSet(HOUR) && !isSet(MINUTE)) { return false; } // fill in default values for unset fields except timezone for (int field = YEAR; field <= TIMEZONE; field++) { if (get(field) == null) { if (field == TIMEZONE && !strict) { // We only use UTC as default timezone for dates parsed complying with // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty // and local timezone is used. continue; } final int value = getDefaultValue(field); set(field, value); } } if (!strict) { // swap year, month, and day if it looks like the right thing to do if (isDay(get(YEAR))) { final int d = get(YEAR); set(YEAR, get(DAY)); if (namedMonth) { // d-m-y set(DAY, d); } else { // m-d-y final int d2 = get(MONTH); set(MONTH, d); set(DAY, d2); } } // sanity checks now that we know what's what if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { return false; } // add 1900 or 2000 to year if it's between 0 and 100 final int year = get(YEAR); if (year >= 0 && year < 100) { set(YEAR, year >= 50 ? 1900 + year : 2000 + year); } } else { // 24 hour value is only allowed if all other time values are zero if (get(HOUR) == 24 && (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { return false; } } // set month to 0-based set(MONTH, get(MONTH) - 1); return true; } private static void addName(final String str, final int type, final int value) { final Name name = new Name(str, type, value); names.put(name.key, name); } private static class Name { final String name; final String key; final int value; final int type; final static int DAY_OF_WEEK = -1; final static int MONTH_NAME = 0; final static int AM_PM = 1; final static int TIMEZONE_ID = 2; final static int TIME_SEPARATOR = 3; Name(final String name, final int type, final int value) { assert name != null; assert name.equals(name.toLowerCase(Locale.ENGLISH)); this.name = name; // use first three characters as lookup key this.key = name.substring(0, Math.min(3, name.length())); this.type = type; this.value = value; } public boolean matches(final String str, final int offset, final int len) { return name.regionMatches(true, 0, str, offset, len); } @Override public String toString() { return name; } } }