/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.sequence;
import java.util.List;
import org.apache.commons.collections4.Equator;
import org.apache.commons.collections4.functors.DefaultEquator;
This class allows to compare two objects sequences.
The two sequences can hold any object type, as only the equals
method is used to compare the elements of the sequences. It is guaranteed
that the comparisons will always be done as o1.equals(o2)
where
o1
belongs to the first sequence and o2
belongs to
the second sequence. This can be important if subclassing is used for some
elements in the first sequence and the equals
method is
specialized.
Comparison can be seen from two points of view: either as giving the smallest
modification allowing to transform the first sequence into the second one, or
as giving the longest sequence which is a subsequence of both initial
sequences. The equals
method is used to compare objects, so any
object can be put into sequences. Modifications include deleting, inserting
or keeping one object, starting from the beginning of the first sequence.
This class implements the comparison algorithm, which is the very efficient
algorithm from Eugene W. Myers
An O(ND) Difference Algorithm and Its Variations. This algorithm produces the shortest possible edit script
containing all the commands
needed to transform the first sequence into the second one.
See Also: Since: 4.0
/**
* This class allows to compare two objects sequences.
* <p>
* The two sequences can hold any object type, as only the <code>equals</code>
* method is used to compare the elements of the sequences. It is guaranteed
* that the comparisons will always be done as <code>o1.equals(o2)</code> where
* <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to
* the second sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method is
* specialized.
* </p>
* <p>
* Comparison can be seen from two points of view: either as giving the smallest
* modification allowing to transform the first sequence into the second one, or
* as giving the longest sequence which is a subsequence of both initial
* sequences. The <code>equals</code> method is used to compare objects, so any
* object can be put into sequences. Modifications include deleting, inserting
* or keeping one object, starting from the beginning of the first sequence.
* </p>
* <p>
* This class implements the comparison algorithm, which is the very efficient
* algorithm from Eugene W. Myers
* <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
* the shortest possible
* {@link EditScript edit script}
* containing all the
* {@link EditCommand commands}
* needed to transform the first sequence into the second one.
* </p>
*
* @see EditScript
* @see EditCommand
* @see CommandVisitor
*
* @since 4.0
*/
public class SequencesComparator<T> {
First sequence. /** First sequence. */
private final List<T> sequence1;
Second sequence. /** Second sequence. */
private final List<T> sequence2;
The equator used for testing object equality. /** The equator used for testing object equality. */
private final Equator<? super T> equator;
Temporary variables. /** Temporary variables. */
private final int[] vDown;
private final int[] vUp;
Simple constructor.
Creates a new instance of SequencesComparator using a DefaultEquator
.
It is guaranteed that the comparisons will always be done as
o1.equals(o2)
where o1
belongs to the first
sequence and o2
belongs to the second sequence. This can be
important if subclassing is used for some elements in the first sequence
and the equals
method is specialized.
Params: - sequence1 – first sequence to be compared
- sequence2 – second sequence to be compared
/**
* Simple constructor.
* <p>
* Creates a new instance of SequencesComparator using a {@link DefaultEquator}.
* <p>
* It is <em>guaranteed</em> that the comparisons will always be done as
* <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
* sequence and <code>o2</code> belongs to the second sequence. This can be
* important if subclassing is used for some elements in the first sequence
* and the <code>equals</code> method is specialized.
*
* @param sequence1 first sequence to be compared
* @param sequence2 second sequence to be compared
*/
public SequencesComparator(final List<T> sequence1, final List<T> sequence2) {
this(sequence1, sequence2, DefaultEquator.defaultEquator());
}
Simple constructor.
Creates a new instance of SequencesComparator with a custom Equator
.
It is guaranteed that the comparisons will always be done as
Equator.equate(o1, o2)
where o1
belongs to the first
sequence and o2
belongs to the second sequence.
Params: - sequence1 – first sequence to be compared
- sequence2 – second sequence to be compared
- equator – the equator to use for testing object equality
/**
* Simple constructor.
* <p>
* Creates a new instance of SequencesComparator with a custom {@link Equator}.
* <p>
* It is <em>guaranteed</em> that the comparisons will always be done as
* <code>Equator.equate(o1, o2)</code> where <code>o1</code> belongs to the first
* sequence and <code>o2</code> belongs to the second sequence.
*
* @param sequence1 first sequence to be compared
* @param sequence2 second sequence to be compared
* @param equator the equator to use for testing object equality
*/
public SequencesComparator(final List<T> sequence1, final List<T> sequence2, final Equator<? super T> equator) {
this.sequence1 = sequence1;
this.sequence2 = sequence2;
this.equator = equator;
final int size = sequence1.size() + sequence2.size() + 2;
vDown = new int[size];
vUp = new int[size];
}
Get the EditScript
object. It is guaranteed that the objects embedded in the
insert commands
come from the second sequence and that the objects embedded in either the delete commands
or keep commands
come from the first sequence. This can be important if subclassing is used for some elements in the first sequence and the equals
method is specialized.
Returns: the edit script resulting from the comparison of the two
sequences
/**
* Get the {@link EditScript} object.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand
* insert commands} come from the second sequence and that the objects
* embedded in either the {@link DeleteCommand delete commands} or
* {@link KeepCommand keep commands} come from the first sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the <code>equals</code> method is specialized.
*
* @return the edit script resulting from the comparison of the two
* sequences
*/
public EditScript<T> getScript() {
final EditScript<T> script = new EditScript<>();
buildScript(0, sequence1.size(), 0, sequence2.size(), script);
return script;
}
Build a snake.
Params: - start – the value of the start of the snake
- diag – the value of the diagonal of the snake
- end1 – the value of the end of the first sequence to be compared
- end2 – the value of the end of the second sequence to be compared
Returns: the snake built
/**
* Build a snake.
*
* @param start the value of the start of the snake
* @param diag the value of the diagonal of the snake
* @param end1 the value of the end of the first sequence to be compared
* @param end2 the value of the end of the second sequence to be compared
* @return the snake built
*/
private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
int end = start;
while (end - diag < end2
&& end < end1
&& equator.equate(sequence1.get(end), sequence2.get(end - diag))) {
++end;
}
return new Snake(start, end, diag);
}
Get the middle snake corresponding to two subsequences of the
main sequences.
The snake is found using the MYERS Algorithm (this algorithms has
also been implemented in the GNU diff program). This algorithm is
explained in Eugene Myers article:
An O(ND) Difference Algorithm and Its Variations.
Params: - start1 – the begin of the first sequence to be compared
- end1 – the end of the first sequence to be compared
- start2 – the begin of the second sequence to be compared
- end2 – the end of the second sequence to be compared
Returns: the middle snake
/**
* Get the middle snake corresponding to two subsequences of the
* main sequences.
* <p>
* The snake is found using the MYERS Algorithm (this algorithms has
* also been implemented in the GNU diff program). This algorithm is
* explained in Eugene Myers article:
* <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>.
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @return the middle snake
*/
private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) {
// Myers Algorithm
// Initialisations
final int m = end1 - start1;
final int n = end2 - start2;
if (m == 0 || n == 0) {
return null;
}
final int delta = m - n;
final int sum = n + m;
final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
vDown[1+offset] = start1;
vUp[1+offset] = end1 + 1;
for (int d = 0; d <= offset ; ++d) {
// Down
for (int k = -d; k <= d; k += 2) {
// First step
final int i = k + offset;
if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
vDown[i] = vDown[i+1];
} else {
vDown[i] = vDown[i-1] + 1;
}
int x = vDown[i];
int y = x - start1 + start2 - k;
while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) {
vDown[i] = ++x;
++y;
}
// Second step
if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
if (vUp[i-delta] <= vDown[i]) { // NOPMD
return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
}
}
}
// Up
for (int k = delta - d; k <= delta + d; k += 2) {
// First step
final int i = k + offset - delta;
if (k == delta - d
|| k != delta + d && vUp[i+1] <= vUp[i-1]) {
vUp[i] = vUp[i+1] - 1;
} else {
vUp[i] = vUp[i-1];
}
int x = vUp[i] - 1;
int y = x - start1 + start2 - k;
while (x >= start1 && y >= start2
&& equator.equate(sequence1.get(x), sequence2.get(y))) {
vUp[i] = x--;
y--;
}
// Second step
if (delta % 2 == 0 && -d <= k && k <= d ) {
if (vUp[i] <= vDown[i + delta]) { // NOPMD
return buildSnake(vUp[i], k + start1 - start2, end1, end2);
}
}
}
}
// this should not happen
throw new RuntimeException("Internal Error");
}
Build an edit script.
Params: - start1 – the begin of the first sequence to be compared
- end1 – the end of the first sequence to be compared
- start2 – the begin of the second sequence to be compared
- end2 – the end of the second sequence to be compared
- script – the edited script
/**
* Build an edit script.
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @param script the edited script
*/
private void buildScript(final int start1, final int end1, final int start2, final int end2,
final EditScript<T> script) {
final Snake middle = getMiddleSnake(start1, end1, start2, end2);
if (middle == null
|| middle.getStart() == end1 && middle.getDiag() == end1 - end2
|| middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
int i = start1;
int j = start2;
while (i < end1 || j < end2) {
if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) {
script.append(new KeepCommand<>(sequence1.get(i)));
++i;
++j;
} else {
if (end1 - start1 > end2 - start2) {
script.append(new DeleteCommand<>(sequence1.get(i)));
++i;
} else {
script.append(new InsertCommand<>(sequence2.get(j)));
++j;
}
}
}
} else {
buildScript(start1, middle.getStart(),
start2, middle.getStart() - middle.getDiag(),
script);
for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
script.append(new KeepCommand<>(sequence1.get(i)));
}
buildScript(middle.getEnd(), end1,
middle.getEnd() - middle.getDiag(), end2,
script);
}
}
This class is a simple placeholder to hold the end part of a path under construction in a SequencesComparator
. /**
* This class is a simple placeholder to hold the end part of a path
* under construction in a {@link SequencesComparator SequencesComparator}.
*/
private static class Snake {
Start index. /** Start index. */
private final int start;
End index. /** End index. */
private final int end;
Diagonal number. /** Diagonal number. */
private final int diag;
Simple constructor. Creates a new instance of Snake with specified indices.
Params: - start – start index of the snake
- end – end index of the snake
- diag – diagonal number
/**
* Simple constructor. Creates a new instance of Snake with specified indices.
*
* @param start start index of the snake
* @param end end index of the snake
* @param diag diagonal number
*/
public Snake(final int start, final int end, final int diag) {
this.start = start;
this.end = end;
this.diag = diag;
}
Get the start index of the snake.
Returns: start index of the snake
/**
* Get the start index of the snake.
*
* @return start index of the snake
*/
public int getStart() {
return start;
}
Get the end index of the snake.
Returns: end index of the snake
/**
* Get the end index of the snake.
*
* @return end index of the snake
*/
public int getEnd() {
return end;
}
Get the diagonal number of the snake.
Returns: diagonal number of the snake
/**
* Get the diagonal number of the snake.
*
* @return diagonal number of the snake
*/
public int getDiag() {
return diag;
}
}
}