/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.geo;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;

/*
  We accept either a whole type: Feature, like this:

    { "type": "Feature",
      "geometry": {
         "type": "Polygon",
         "coordinates": [
           [ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
             [100.0, 1.0], [100.0, 0.0] ]
           ]
       },
       "properties": {
         "prop0": "value0",
         "prop1": {"this": "that"}
         }
       }

   Or the inner object with type: Multi/Polygon.

   Or a type: FeatureCollection, if it has only one Feature which is a Polygon or MultiPolyon.

   type: MultiPolygon (union of polygons) is also accepted.
*/

Does minimal parsing of a GeoJSON object, to extract either Polygon or MultiPolygon, either directly as the top-level type, or if the top-level type is Feature, as the geometry of that feature.
/** Does minimal parsing of a GeoJSON object, to extract either Polygon or MultiPolygon, either directly as the top-level type, or if * the top-level type is Feature, as the geometry of that feature. */
@SuppressWarnings("unchecked") class SimpleGeoJSONPolygonParser { final String input; private int upto; private String polyType; private List<Object> coordinates; public SimpleGeoJSONPolygonParser(String input) { this.input = input; } public Polygon[] parse() throws ParseException { // parse entire object parseObject(""); // make sure there's nothing left: readEnd(); // The order of JSON object keys (type, geometry, coordinates in our case) can be arbitrary, so we wait until we are done parsing to // put the pieces together here: if (coordinates == null) { throw newParseException("did not see any polygon coordinates"); } if (polyType == null) { throw newParseException("did not see type: Polygon or MultiPolygon"); } if (polyType.equals("Polygon")) { return new Polygon[] {parsePolygon(coordinates)}; } else { List<Polygon> polygons = new ArrayList<>(); for(int i=0;i<coordinates.size();i++) { Object o = coordinates.get(i); if (o instanceof List == false) { throw newParseException("elements of coordinates array should be an array, but got: " + o.getClass()); } polygons.add(parsePolygon((List<Object>) o)); } return polygons.toArray(new Polygon[polygons.size()]); } }
path is the "address" by keys of where we are, e.g. geometry.coordinates
/** path is the "address" by keys of where we are, e.g. geometry.coordinates */
private void parseObject(String path) throws ParseException { scan('{'); boolean first = true; while (true) { char ch = peek(); if (ch == '}') { break; } else if (first == false) { if (ch == ',') { // ok upto++; ch = peek(); if (ch == '}') { break; } } else { throw newParseException("expected , but got " + ch); } } first = false; int uptoStart = upto; String key = parseString(); if (path.equals("crs.properties") && key.equals("href")) { upto = uptoStart; throw newParseException("cannot handle linked crs"); } scan(':'); Object o; ch = peek(); uptoStart = upto; if (ch == '[') { String newPath; if (path.length() == 0) { newPath = key; } else { newPath = path + "." + key; } o = parseArray(newPath); } else if (ch == '{') { String newPath; if (path.length() == 0) { newPath = key; } else { newPath = path + "." + key; } parseObject(newPath); o = null; } else if (ch == '"') { o = parseString(); } else if (ch == 't') { scan("true"); o = Boolean.TRUE; } else if (ch == 'f') { scan("false"); o = Boolean.FALSE; } else if (ch == 'n') { scan("null"); o = null; } else if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9')) { o = parseNumber(); } else if (ch == '}') { break; } else { throw newParseException("expected array, object, string or literal value, but got: " + ch); } if (path.equals("crs.properties") && key.equals("name")) { if (o instanceof String == false) { upto = uptoStart; throw newParseException("crs.properties.name should be a string, but saw: " + o); } String crs = (String) o; if (crs.startsWith("urn:ogc:def:crs:OGC") == false || crs.endsWith(":CRS84") == false) { upto = uptoStart; throw newParseException("crs must be CRS84 from OGC, but saw: " + o); } } if (key.equals("type") && path.startsWith("crs") == false) { if (o instanceof String == false) { upto = uptoStart; throw newParseException("type should be a string, but got: " + o); } String type = (String) o; if (type.equals("Polygon") && isValidGeometryPath(path)) { polyType = "Polygon"; } else if (type.equals("MultiPolygon") && isValidGeometryPath(path)) { polyType = "MultiPolygon"; } else if ((type.equals("FeatureCollection") || type.equals("Feature")) && (path.equals("features.[]") || path.equals(""))) { // OK, we recurse } else { upto = uptoStart; throw newParseException("can only handle type FeatureCollection (if it has a single polygon geometry), Feature, Polygon or MultiPolygon, but got " + type); } } else if (key.equals("coordinates") && isValidGeometryPath(path)) { if (o instanceof List == false) { upto = uptoStart; throw newParseException("coordinates should be an array, but got: " + o.getClass()); } if (coordinates != null) { upto = uptoStart; throw newParseException("only one Polygon or MultiPolygon is supported"); } coordinates = (List<Object>) o; } } scan('}'); }
Returns true if the object path is a valid location to see a Multi/Polygon geometry
/** Returns true if the object path is a valid location to see a Multi/Polygon geometry */
private boolean isValidGeometryPath(String path) { return path.equals("") || path.equals("geometry") || path.equals("features.[].geometry"); } private Polygon parsePolygon(List<Object> coordinates) throws ParseException { List<Polygon> holes = new ArrayList<>(); Object o = coordinates.get(0); if (o instanceof List == false) { throw newParseException("first element of polygon array must be an array [[lat, lon], [lat, lon] ...] but got: " + o); } double[][] polyPoints = parsePoints((List<Object>) o); for(int i=1;i<coordinates.size();i++) { o = coordinates.get(i); if (o instanceof List == false) { throw newParseException("elements of coordinates array must be an array [[lat, lon], [lat, lon] ...] but got: " + o); } double[][] holePoints = parsePoints((List<Object>) o); holes.add(new Polygon(holePoints[0], holePoints[1])); } return new Polygon(polyPoints[0], polyPoints[1], holes.toArray(new Polygon[holes.size()])); }
Parses [[lat, lon], [lat, lon] ...] into 2d double array
/** Parses [[lat, lon], [lat, lon] ...] into 2d double array */
private double[][] parsePoints(List<Object> o) throws ParseException { double[] lats = new double[o.size()]; double[] lons = new double[o.size()]; for(int i=0;i<o.size();i++) { Object point = o.get(i); if (point instanceof List == false) { throw newParseException("elements of coordinates array must [lat, lon] array, but got: " + point); } List<Object> pointList = (List<Object>) point; if (pointList.size() != 2) { throw newParseException("elements of coordinates array must [lat, lon] array, but got wrong element count: " + pointList); } if (pointList.get(0) instanceof Double == false) { throw newParseException("elements of coordinates array must [lat, lon] array, but first element is not a Double: " + pointList.get(0)); } if (pointList.get(1) instanceof Double == false) { throw newParseException("elements of coordinates array must [lat, lon] array, but second element is not a Double: " + pointList.get(1)); } // lon, lat ordering in GeoJSON! lons[i] = ((Double) pointList.get(0)).doubleValue(); lats[i] = ((Double) pointList.get(1)).doubleValue(); } return new double[][] {lats, lons}; } private List<Object> parseArray(String path) throws ParseException { List<Object> result = new ArrayList<>(); scan('['); while (upto < input.length()) { char ch = peek(); if (ch == ']') { scan(']'); return result; } if (result.size() > 0) { if (ch != ',') { throw newParseException("expected ',' separating list items, but got '" + ch + "'"); } // skip the , upto++; if (upto == input.length()) { throw newParseException("hit EOF while parsing array"); } ch = peek(); } Object o; if (ch == '[') { o = parseArray(path + ".[]"); } else if (ch == '{') { // This is only used when parsing the "features" in type: FeatureCollection parseObject(path + ".[]"); o = null; } else if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9')) { o = parseNumber(); } else if (ch == '"') { o = parseString(); } else { throw newParseException("expected another array or number while parsing array, not '" + ch + "'"); } result.add(o); } throw newParseException("hit EOF while reading array"); } private Number parseNumber() throws ParseException { StringBuilder b = new StringBuilder(); int uptoStart = upto; while (upto < input.length()) { char ch = input.charAt(upto); if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9') || ch == 'e' || ch == 'E') { upto++; b.append(ch); } else { break; } } // we only handle doubles try { return Double.parseDouble(b.toString()); } catch (NumberFormatException nfe) { upto = uptoStart; throw newParseException("could not parse number as double"); } } private String parseString() throws ParseException { scan('"'); StringBuilder b = new StringBuilder(); while (upto < input.length()) { char ch = input.charAt(upto); if (ch == '"') { upto++; return b.toString(); } if (ch == '\\') { // an escaped character upto++; if (upto == input.length()) { throw newParseException("hit EOF inside string literal"); } ch = input.charAt(upto); if (ch == 'u') { // 4 hex digit unicode BMP escape upto++; if (upto + 4 > input.length()) { throw newParseException("hit EOF inside string literal"); } b.append(Integer.parseInt(input.substring(upto, upto+4), 16)); } else if (ch == '\\') { b.append('\\'); upto++; } else { // TODO: allow \n, \t, etc.??? throw newParseException("unsupported string escape character \\" + ch); } } else { b.append(ch); upto++; } } throw newParseException("hit EOF inside string literal"); } private char peek() throws ParseException { while (upto < input.length()) { char ch = input.charAt(upto); if (isJSONWhitespace(ch)) { upto++; continue; } return ch; } throw newParseException("unexpected EOF"); }
Scans across whitespace and consumes the expected character, or throws ParseException if the character is wrong
/** Scans across whitespace and consumes the expected character, or throws {@code ParseException} if the character is wrong */
private void scan(char expected) throws ParseException { while (upto < input.length()) { char ch = input.charAt(upto); if (isJSONWhitespace(ch)) { upto++; continue; } if (ch != expected) { throw newParseException("expected '" + expected + "' but got '" + ch + "'"); } upto++; return; } throw newParseException("expected '" + expected + "' but got EOF"); } private void readEnd() throws ParseException { while (upto < input.length()) { char ch = input.charAt(upto); if (isJSONWhitespace(ch) == false) { throw newParseException("unexpected character '" + ch + "' after end of GeoJSON object"); } upto++; } }
Scans the expected string, or throws ParseException
/** Scans the expected string, or throws {@code ParseException} */
private void scan(String expected) throws ParseException { if (upto + expected.length() > input.length()) { throw newParseException("expected \"" + expected + "\" but hit EOF"); } String subString = input.substring(upto, upto+expected.length()); if (subString.equals(expected) == false) { throw newParseException("expected \"" + expected + "\" but got \"" + subString + "\""); } upto += expected.length(); } private static boolean isJSONWhitespace(char ch) { // JSON doesn't accept allow unicode whitespace? return ch == 0x20 || // space ch == 0x09 || // tab ch == 0x0a || // line feed ch == 0x0d; // newline }
When calling this, upto should be at the position of the incorrect character!
/** When calling this, upto should be at the position of the incorrect character! */
private ParseException newParseException(String details) throws ParseException { String fragment; int end = Math.min(input.length(), upto+1); if (upto < 50) { fragment = input.substring(0, end); } else { fragment = "..." + input.substring(upto-50, end); } return new ParseException(details + " at character offset " + upto + "; fragment leading to this:\n" + fragment, upto); } }