/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.index.sasi.plan;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;

import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.config.ColumnDefinition.Kind;
import org.apache.cassandra.cql3.Operator;
import org.apache.cassandra.db.filter.RowFilter;
import org.apache.cassandra.db.rows.Row;
import org.apache.cassandra.db.rows.Unfiltered;
import org.apache.cassandra.index.sasi.conf.ColumnIndex;
import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
import org.apache.cassandra.index.sasi.disk.Token;
import org.apache.cassandra.index.sasi.plan.Expression.Op;
import org.apache.cassandra.index.sasi.utils.RangeIntersectionIterator;
import org.apache.cassandra.index.sasi.utils.RangeIterator;
import org.apache.cassandra.index.sasi.utils.RangeUnionIterator;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.*;
import org.apache.cassandra.utils.FBUtilities;

@SuppressWarnings("resource")
public class Operation extends RangeIterator<Long, Token>
{
    public enum OperationType
    {
        AND, OR;

        public boolean apply(boolean a, boolean b)
        {
            switch (this)
            {
                case OR:
                    return a | b;

                case AND:
                    return a & b;

                default:
                    throw new AssertionError();
            }
        }
    }

    private final QueryController controller;

    protected final OperationType op;
    protected final ListMultimap<ColumnDefinition, Expression> expressions;
    protected final RangeIterator<Long, Token> range;

    protected Operation left, right;

    private Operation(OperationType operation,
                      QueryController controller,
                      ListMultimap<ColumnDefinition, Expression> expressions,
                      RangeIterator<Long, Token> range,
                      Operation left, Operation right)
    {
        super(range);

        this.op = operation;
        this.controller = controller;
        this.expressions = expressions;
        this.range = range;

        this.left = left;
        this.right = right;
    }

    
Recursive "satisfies" checks based on operation and data from the lower level members using depth-first search and bubbling the results back to the top level caller. Most of the work here is done by localSatisfiedBy(Unfiltered, Row, boolean) see it's comment for details, if there are no local expressions assigned to Operation it will call satisfiedBy(Row) on it's children. Query: first_name = X AND (last_name = Y OR address = XYZ AND street = IL AND city = C) OR (state = 'CA' AND country = 'US') Row: key1: (first_name: X, last_name: Z, address: XYZ, street: IL, city: C, state: NY, country:US) #1 OR / \ #2 (first_name) AND AND (state, country) \ #3 (last_name) OR \ #4 AND (address, street, city) Evaluation of the key1 is top-down depth-first search: --- going down --- Level #1 is evaluated, OR expression has to pull results from it's children which are at level #2 and OR them together, Level #2 AND (state, country) could be be evaluated right away, AND (first_name) refers to it's "right" child from level #3 Level #3 OR (last_name) requests results from level #4 Level #4 AND (address, street, city) does logical AND between it's 3 fields, returns result back to level #3. --- bubbling up --- Level #3 computes OR between AND (address, street, city) result and it's "last_name" expression Level #2 computes AND between "first_name" and result of level #3, AND (state, country) which is already computed Level #1 does OR between results of AND (first_name) and AND (state, country) and returns final result.
Params:
  • currentCluster – The row cluster to check.
  • staticRow – The static row associated with current cluster.
  • allowMissingColumns – allow columns value to be null.
Returns:true if give Row satisfied all of the expressions in the tree, false otherwise.
/** * Recursive "satisfies" checks based on operation * and data from the lower level members using depth-first search * and bubbling the results back to the top level caller. * * Most of the work here is done by {@link #localSatisfiedBy(Unfiltered, Row, boolean)} * see it's comment for details, if there are no local expressions * assigned to Operation it will call satisfiedBy(Row) on it's children. * * Query: first_name = X AND (last_name = Y OR address = XYZ AND street = IL AND city = C) OR (state = 'CA' AND country = 'US') * Row: key1: (first_name: X, last_name: Z, address: XYZ, street: IL, city: C, state: NY, country:US) * * #1 OR * / \ * #2 (first_name) AND AND (state, country) * \ * #3 (last_name) OR * \ * #4 AND (address, street, city) * * * Evaluation of the key1 is top-down depth-first search: * * --- going down --- * Level #1 is evaluated, OR expression has to pull results from it's children which are at level #2 and OR them together, * Level #2 AND (state, country) could be be evaluated right away, AND (first_name) refers to it's "right" child from level #3 * Level #3 OR (last_name) requests results from level #4 * Level #4 AND (address, street, city) does logical AND between it's 3 fields, returns result back to level #3. * --- bubbling up --- * Level #3 computes OR between AND (address, street, city) result and it's "last_name" expression * Level #2 computes AND between "first_name" and result of level #3, AND (state, country) which is already computed * Level #1 does OR between results of AND (first_name) and AND (state, country) and returns final result. * * @param currentCluster The row cluster to check. * @param staticRow The static row associated with current cluster. * @param allowMissingColumns allow columns value to be null. * @return true if give Row satisfied all of the expressions in the tree, * false otherwise. */
public boolean satisfiedBy(Unfiltered currentCluster, Row staticRow, boolean allowMissingColumns) { boolean sideL, sideR; if (expressions == null || expressions.isEmpty()) { sideL = left != null && left.satisfiedBy(currentCluster, staticRow, allowMissingColumns); sideR = right != null && right.satisfiedBy(currentCluster, staticRow, allowMissingColumns); // one of the expressions was skipped // because it had no indexes attached if (left == null) return sideR; } else { sideL = localSatisfiedBy(currentCluster, staticRow, allowMissingColumns); // if there is no right it means that this expression // is last in the sequence, we can just return result from local expressions if (right == null) return sideL; sideR = right.satisfiedBy(currentCluster, staticRow, allowMissingColumns); } return op.apply(sideL, sideR); }
Check every expression in the analyzed list to figure out if the columns in the give row match all of the based on the operation set to the current operation node. The algorithm is as follows: for every given expression from analyzed list get corresponding column from the Row: - apply Expression.isSatisfiedBy(ByteBuffer) method to figure out if it's satisfied; - apply logical operation between boolean accumulator and current boolean result; - if result == false and node's operation is AND return right away; After all of the expressions have been evaluated return resulting accumulator variable. Example: Operation = (op: AND, columns: [first_name = p, 5 < age < 7, last_name: y]) Row = (first_name: pavel, last_name: y, age: 6, timestamp: 15) #1 get "first_name" = p (expressions) - row-get "first_name" => "pavel" - compare "pavel" against "p" => true (current) - set accumulator current => true (because this is expression #1) #2 get "last_name" = y (expressions) - row-get "last_name" => "y" - compare "y" against "y" => true (current) - set accumulator to accumulator & current => true #3 get 5 < "age" < 7 (expressions) - row-get "age" => "6" - compare 5 < 6 < 7 => true (current) - set accumulator to accumulator & current => true #4 return accumulator => true (row satisfied all of the conditions)
Params:
  • currentCluster – The row cluster to check.
  • staticRow – The static row associated with current cluster.
  • allowMissingColumns – allow columns value to be null.
Returns:true if give Row satisfied all of the analyzed expressions, false otherwise.
/** * Check every expression in the analyzed list to figure out if the * columns in the give row match all of the based on the operation * set to the current operation node. * * The algorithm is as follows: for every given expression from analyzed * list get corresponding column from the Row: * - apply {@link Expression#isSatisfiedBy(ByteBuffer)} * method to figure out if it's satisfied; * - apply logical operation between boolean accumulator and current boolean result; * - if result == false and node's operation is AND return right away; * * After all of the expressions have been evaluated return resulting accumulator variable. * * Example: * * Operation = (op: AND, columns: [first_name = p, 5 < age < 7, last_name: y]) * Row = (first_name: pavel, last_name: y, age: 6, timestamp: 15) * * #1 get "first_name" = p (expressions) * - row-get "first_name" => "pavel" * - compare "pavel" against "p" => true (current) * - set accumulator current => true (because this is expression #1) * * #2 get "last_name" = y (expressions) * - row-get "last_name" => "y" * - compare "y" against "y" => true (current) * - set accumulator to accumulator & current => true * * #3 get 5 < "age" < 7 (expressions) * - row-get "age" => "6" * - compare 5 < 6 < 7 => true (current) * - set accumulator to accumulator & current => true * * #4 return accumulator => true (row satisfied all of the conditions) * * @param currentCluster The row cluster to check. * @param staticRow The static row associated with current cluster. * @param allowMissingColumns allow columns value to be null. * @return true if give Row satisfied all of the analyzed expressions, * false otherwise. */
private boolean localSatisfiedBy(Unfiltered currentCluster, Row staticRow, boolean allowMissingColumns) { if (currentCluster == null || !currentCluster.isRow()) return false; final int now = FBUtilities.nowInSeconds(); boolean result = false; int idx = 0; for (ColumnDefinition column : expressions.keySet()) { if (column.kind == Kind.PARTITION_KEY) continue; ByteBuffer value = ColumnIndex.getValueOf(column, column.kind == Kind.STATIC ? staticRow : (Row) currentCluster, now); boolean isMissingColumn = value == null; if (!allowMissingColumns && isMissingColumn) throw new IllegalStateException("All indexed columns should be included into the column slice, missing: " + column); boolean isMatch = false; // If there is a column with multiple expressions that effectively means an OR // e.g. comment = 'x y z' could be split into 'comment' EQ 'x', 'comment' EQ 'y', 'comment' EQ 'z' // by analyzer, in situation like that we only need to check if at least one of expressions matches, // and there is no hit on the NOT_EQ (if any) which are always at the end of the filter list. // Loop always starts from the end of the list, which makes it possible to break after the last // NOT_EQ condition on first EQ/RANGE condition satisfied, instead of checking every // single expression in the column filter list. List<Expression> filters = expressions.get(column); for (int i = filters.size() - 1; i >= 0; i--) { Expression expression = filters.get(i); isMatch = !isMissingColumn && expression.isSatisfiedBy(value); if (expression.getOp() == Op.NOT_EQ) { // since this is NOT_EQ operation we have to // inverse match flag (to check against other expressions), // and break in case of negative inverse because that means // that it's a positive hit on the not-eq clause. isMatch = !isMatch; if (!isMatch) break; } // if it was a match on EQ/RANGE or column is missing else if (isMatch || isMissingColumn) break; } if (idx++ == 0) { result = isMatch; continue; } result = op.apply(result, isMatch); // exit early because we already got a single false if (op == OperationType.AND && !result) return false; } return idx == 0 || result; } @VisibleForTesting protected static ListMultimap<ColumnDefinition, Expression> analyzeGroup(QueryController controller, OperationType op, List<RowFilter.Expression> expressions) { ListMultimap<ColumnDefinition, Expression> analyzed = ArrayListMultimap.create(); // sort all of the expressions in the operation by name and priority of the logical operator // this gives us an efficient way to handle inequality and combining into ranges without extra processing // and converting expressions from one type to another. Collections.sort(expressions, (a, b) -> { int cmp = a.column().compareTo(b.column()); return cmp == 0 ? -Integer.compare(getPriority(a.operator()), getPriority(b.operator())) : cmp; }); for (final RowFilter.Expression e : expressions) { ColumnIndex columnIndex = controller.getIndex(e); List<Expression> perColumn = analyzed.get(e.column()); if (columnIndex == null) columnIndex = new ColumnIndex(controller.getKeyValidator(), e.column(), null); AbstractAnalyzer analyzer = columnIndex.getAnalyzer(); analyzer.reset(e.getIndexValue().duplicate()); // EQ/LIKE_*/NOT_EQ can have multiple expressions e.g. text = "Hello World", // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer), // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present, // in such case we know exactly that there would be no more EQ/RANGE expressions for given column // since NOT_EQ has the lowest priority. boolean isMultiExpression = false; switch (e.operator()) { case EQ: isMultiExpression = false; break; case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: case LIKE_MATCHES: isMultiExpression = true; break; case NEQ: isMultiExpression = (perColumn.size() == 0 || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)); break; } if (isMultiExpression) { while (analyzer.hasNext()) { final ByteBuffer token = analyzer.next(); perColumn.add(new Expression(controller, columnIndex).add(e.operator(), token)); } } else // "range" or not-equals operator, combines both bounds together into the single expression, // iff operation of the group is AND, otherwise we are forced to create separate expressions, // not-equals is combined with the range iff operator is AND. { Expression range; if (perColumn.size() == 0 || op != OperationType.AND) perColumn.add((range = new Expression(controller, columnIndex))); else range = Iterables.getLast(perColumn); while (analyzer.hasNext()) range.add(e.operator(), analyzer.next()); } } return analyzed; } private static int getPriority(Operator op) { switch (op) { case EQ: return 5; case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: case LIKE_MATCHES: return 4; case GTE: case GT: return 3; case LTE: case LT: return 2; case NEQ: return 1; default: return 0; } } protected Token computeNext() { return range != null && range.hasNext() ? range.next() : endOfData(); } protected void performSkipTo(Long nextToken) { if (range != null) range.skipTo(nextToken); } public void close() throws IOException { controller.releaseIndexes(this); } public static class Builder { private final QueryController controller; protected final OperationType op; protected final List<RowFilter.Expression> expressions; protected Builder left, right; public Builder(OperationType operation, QueryController controller, RowFilter.Expression... columns) { this.op = operation; this.controller = controller; this.expressions = new ArrayList<>(); Collections.addAll(expressions, columns); } public Builder setRight(Builder operation) { this.right = operation; return this; } public Builder setLeft(Builder operation) { this.left = operation; return this; } public void add(RowFilter.Expression e) { expressions.add(e); } public void add(Collection<RowFilter.Expression> newExpressions) { if (expressions != null) expressions.addAll(newExpressions); } public Operation complete() { if (!expressions.isEmpty()) { ListMultimap<ColumnDefinition, Expression> analyzedExpressions = analyzeGroup(controller, op, expressions); RangeIterator.Builder<Long, Token> range = controller.getIndexes(op, analyzedExpressions.values()); Operation rightOp = null; if (right != null) { rightOp = right.complete(); range.add(rightOp); } return new Operation(op, controller, analyzedExpressions, range.build(), null, rightOp); } else { Operation leftOp = null, rightOp = null; boolean leftIndexes = false, rightIndexes = false; if (left != null) { leftOp = left.complete(); leftIndexes = leftOp != null && leftOp.range != null; } if (right != null) { rightOp = right.complete(); rightIndexes = rightOp != null && rightOp.range != null; } RangeIterator<Long, Token> join; /** * Operation should allow one of it's sub-trees to wrap no indexes, that is related to the fact that we * have to accept defined-but-not-indexed columns as well as key range as IndexExpressions. * * Two cases are possible: * * only left child produced indexed iterators, that could happen when there are two columns * or key range on the right: * * AND * / \ * OR \ * / \ AND * a b / \ * key key * * only right child produced indexed iterators: * * AND * / \ * AND a * / \ * key key */ if (leftIndexes && !rightIndexes) join = leftOp; else if (!leftIndexes && rightIndexes) join = rightOp; else if (leftIndexes) { RangeIterator.Builder<Long, Token> builder = op == OperationType.OR ? RangeUnionIterator.<Long, Token>builder() : RangeIntersectionIterator.<Long, Token>builder(); join = builder.add(leftOp).add(rightOp).build(); } else throw new AssertionError("both sub-trees have 0 indexes."); return new Operation(op, controller, null, join, leftOp, rightOp); } } } }