org.apache.commons/commons-math3/3.6.1 : org/apache/commons/math3/stat/inference/MannWhitneyUTest.java

MannWhitneyUTest

http://commons.apache.org/proper/commons-math/: The Apache Commons Math project is a library of lightweight, self-contained mathematics and statistics components addressing the most common practical problems not immediately available in the Java programming language or commons-lang. (The Apache Software Foundation)

Apache License, Version 2.0

Eldar Agalarov
Tim Allison
C. Scott Ananian
Mark Anderson
Peter Andrews
Rémi Arntzen
Matt Adereth
Jared Becksfort
Michael Bjorkegren
Brian Bloniarz
John Bollinger
Cyril Briquet
Dave Brosius
Dan Checkoway
Anders Conbere
Charles Cooper
Paul Cowan
Benjamin Croizet
Larry Diamond
Aleksei Dievskii
Rodrigo di Lorenzo Lopes
Hasan Diwan
Ted Dunning
Ole Ersoy
Ajo Fod
John Gant
Ken Geis
Hank Grabowski
Bernhard Grünewaldt
Elliotte Rusty Harold
Dennis Hendriks
Reid Hochstedler
Matthias Hummel
Curtis Jensen
Bruce A Johnson
Ismael Juma
Eugene Kirpichov
Oleksandr Kornieiev
Piotr Kochanski
Sergei Lebedev
Bob MacCallum
Jake Mannix
Benjamin McCann
Patrick Meyer
J. Lewis Muir
Venkatesha Murthy
Christopher Nix
Fredrik Norin
Sean Owen
Sujit Pal
Todd C. Parnell
Andreas Rieger
Sébastien Riou
Bill Rossi
Matthew Rowles
Pavel Ryzhov
Joni Salonen
Michael Saunders
Thorsten Schaefer
Christopher Schuck
Christian Semrau
David Stefka
Mauro Talevi
Radoslav Tsvetkov
Kim van der Linde
Alexey Volkov
Andrew Waterman
Jörg Weimar
Christian Winter
Piotr Wydrych
Xiaogang Zhang
Chris Popp

Mikkel Meyer Andersen
Bill Barker
Sébastien Brisard
Albert Davidson Chou
Mark Diggory
Robert Burrell Donkin
Otmar Ertl
Luc Maisonobe
Tim O'Brien
J. Pietschmann
Dimitri Pourbaix
Gilles Sadowski
Greg Sterijevski
Brent Worden
Thomas Neidhart
Evan Ward

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.math3.stat.inference;

import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.MaxCountExceededException;
import org.apache.commons.math3.exception.NoDataException;
import org.apache.commons.math3.exception.NullArgumentException;
import org.apache.commons.math3.stat.ranking.NaNStrategy;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.apache.commons.math3.stat.ranking.TiesStrategy;
import org.apache.commons.math3.util.FastMath;

An implementation of the Mann-Whitney U test (also called Wilcoxon rank-sum test).
/**
 * An implementation of the Mann-Whitney U test (also called Wilcoxon rank-sum test).
 *
 */
public class MannWhitneyUTest {

    Ranking algorithm. /** Ranking algorithm. */
    private NaturalRanking naturalRanking;

    Create a test instance using where NaN's are left in place and ties get
the average of applicable ranks. Use this unless you are very sure of
what you are doing.
/**
     * Create a test instance using where NaN's are left in place and ties get
     * the average of applicable ranks. Use this unless you are very sure of
     * what you are doing.
     */
    public MannWhitneyUTest() {
        naturalRanking = new NaturalRanking(NaNStrategy.FIXED,
                TiesStrategy.AVERAGE);
    }

    Create a test instance using the given strategies for NaN's and ties.
Only use this if you are sure of what you are doing.
Params: nanStrategy – 
           specifies the strategy that should be used for Double.NaN's
tiesStrategy – 
           specifies the strategy that should be used for ties/**
     * Create a test instance using the given strategies for NaN's and ties.
     * Only use this if you are sure of what you are doing.
     *
     * @param nanStrategy
     *            specifies the strategy that should be used for Double.NaN's
     * @param tiesStrategy
     *            specifies the strategy that should be used for ties
     */
    public MannWhitneyUTest(final NaNStrategy nanStrategy,
                            final TiesStrategy tiesStrategy) {
        naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
    }

    Ensures that the provided arrays fulfills the assumptions.
Params: x – first sample
y – second sample
Throws: NullArgumentException – if x or y are null.
NoDataException – if x or y are zero-length./**
     * Ensures that the provided arrays fulfills the assumptions.
     *
     * @param x first sample
     * @param y second sample
     * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
     * @throws NoDataException if {@code x} or {@code y} are zero-length.
     */
    private void ensureDataConformance(final double[] x, final double[] y)
        throws NullArgumentException, NoDataException {

        if (x == null ||
            y == null) {
            throw new NullArgumentException();
        }
        if (x.length == 0 ||
            y.length == 0) {
            throw new NoDataException();
        }
    }

    Concatenate the samples into one array.
Params: x – first sample
y – second sample
Returns: concatenated array/** Concatenate the samples into one array.
     * @param x first sample
     * @param y second sample
     * @return concatenated array
     */
    private double[] concatenateSamples(final double[] x, final double[] y) {
        final double[] z = new double[x.length + y.length];

        System.arraycopy(x, 0, z, 0, x.length);
        System.arraycopy(y, 0, z, x.length, y.length);

        return z;
    }

    Computes the  Mann-Whitney
U statistic comparing mean for two independent samples possibly of
different length.

This statistic can be used to perform a Mann-Whitney U test evaluating
the null hypothesis that the two independent samples has equal mean.


Let X_i denote the i'th individual of the first sample and
Y_j the j'th individual in the second sample. Note that the
samples would often have different length.


Preconditions:

All observations in the two samples are independent.
The observations are at least ordinal (continuous are also ordinal).


Params: x – the first sample
y – the second sample
Throws: NullArgumentException – if x or y are null.
NoDataException – if x or y are zero-length.
Returns: Mann-Whitney U statistic (maximum of U^x and U^y)/**
     * Computes the <a
     * href="http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U"> Mann-Whitney
     * U statistic</a> comparing mean for two independent samples possibly of
     * different length.
     * <p>
     * This statistic can be used to perform a Mann-Whitney U test evaluating
     * the null hypothesis that the two independent samples has equal mean.
     * </p>
     * <p>
     * Let X<sub>i</sub> denote the i'th individual of the first sample and
     * Y<sub>j</sub> the j'th individual in the second sample. Note that the
     * samples would often have different length.
     * </p>
     * <p>
     * <strong>Preconditions</strong>:
     * <ul>
     * <li>All observations in the two samples are independent.</li>
     * <li>The observations are at least ordinal (continuous are also ordinal).</li>
     * </ul>
     * </p>
     *
     * @param x the first sample
     * @param y the second sample
     * @return Mann-Whitney U statistic (maximum of U<sup>x</sup> and U<sup>y</sup>)
     * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
     * @throws NoDataException if {@code x} or {@code y} are zero-length.
     */
    public double mannWhitneyU(final double[] x, final double[] y)
        throws NullArgumentException, NoDataException {

        ensureDataConformance(x, y);

        final double[] z = concatenateSamples(x, y);
        final double[] ranks = naturalRanking.rank(z);

        double sumRankX = 0;

        /*
         * The ranks for x is in the first x.length entries in ranks because x
         * is in the first x.length entries in z
         */
        for (int i = 0; i < x.length; ++i) {
            sumRankX += ranks[i];
        }

        /*
         * U1 = R1 - (n1 * (n1 + 1)) / 2 where R1 is sum of ranks for sample 1,
         * e.g. x, n1 is the number of observations in sample 1.
         */
        final double U1 = sumRankX - ((long) x.length * (x.length + 1)) / 2;

        /*
         * It can be shown that U1 + U2 = n1 * n2
         */
        final double U2 = (long) x.length * y.length - U1;

        return FastMath.max(U1, U2);
    }

    Params: Umin – smallest Mann-Whitney U value
n1 – number of subjects in first sample
n2 – number of subjects in second sample
Throws: ConvergenceException – if the p-value can not be computed
due to a convergence error
MaxCountExceededException – if the maximum number of
iterations is exceeded
Returns: two-sided asymptotic p-value/**
     * @param Umin smallest Mann-Whitney U value
     * @param n1 number of subjects in first sample
     * @param n2 number of subjects in second sample
     * @return two-sided asymptotic p-value
     * @throws ConvergenceException if the p-value can not be computed
     * due to a convergence error
     * @throws MaxCountExceededException if the maximum number of
     * iterations is exceeded
     */
    private double calculateAsymptoticPValue(final double Umin,
                                             final int n1,
                                             final int n2)
        throws ConvergenceException, MaxCountExceededException {

        /* long multiplication to avoid overflow (double not used due to efficiency
         * and to avoid precision loss)
         */
        final long n1n2prod = (long) n1 * n2;

        // http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U#Normal_approximation
        final double EU = n1n2prod / 2.0;
        final double VarU = n1n2prod * (n1 + n2 + 1) / 12.0;

        final double z = (Umin - EU) / FastMath.sqrt(VarU);

        // No try-catch or advertised exception because args are valid
        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
        final NormalDistribution standardNormal = new NormalDistribution(null, 0, 1);

        return 2 * standardNormal.cumulativeProbability(z);
    }

    Returns the asymptotic observed significance level, or 
p-value, associated with a  Mann-Whitney
U statistic comparing mean for two independent samples.

Let X_i denote the i'th individual of the first sample and
Y_j the j'th individual in the second sample. Note that the
samples would often have different length.


Preconditions:

All observations in the two samples are independent.
The observations are at least ordinal (continuous are also ordinal).


Ties give rise to biased variance at the moment. See e.g. http://mlsc.lboro.ac.uk/resources/statistics/Mannwhitney.pdf.
Params: x – the first sample
y – the second sample
Throws: NullArgumentException – if x or y are null.
NoDataException – if x or y are zero-length.
ConvergenceException – if the p-value can not be computed due to a
convergence error
MaxCountExceededException – if the maximum number of iterations
is exceeded
Returns: asymptotic p-value/**
     * Returns the asymptotic <i>observed significance level</i>, or <a href=
     * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
     * p-value</a>, associated with a <a
     * href="http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U"> Mann-Whitney
     * U statistic</a> comparing mean for two independent samples.
     * <p>
     * Let X<sub>i</sub> denote the i'th individual of the first sample and
     * Y<sub>j</sub> the j'th individual in the second sample. Note that the
     * samples would often have different length.
     * </p>
     * <p>
     * <strong>Preconditions</strong>:
     * <ul>
     * <li>All observations in the two samples are independent.</li>
     * <li>The observations are at least ordinal (continuous are also ordinal).</li>
     * </ul>
     * </p><p>
     * Ties give rise to biased variance at the moment. See e.g. <a
     * href="http://mlsc.lboro.ac.uk/resources/statistics/Mannwhitney.pdf"
     * >http://mlsc.lboro.ac.uk/resources/statistics/Mannwhitney.pdf</a>.</p>
     *
     * @param x the first sample
     * @param y the second sample
     * @return asymptotic p-value
     * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
     * @throws NoDataException if {@code x} or {@code y} are zero-length.
     * @throws ConvergenceException if the p-value can not be computed due to a
     * convergence error
     * @throws MaxCountExceededException if the maximum number of iterations
     * is exceeded
     */
    public double mannWhitneyUTest(final double[] x, final double[] y)
        throws NullArgumentException, NoDataException,
        ConvergenceException, MaxCountExceededException {

        ensureDataConformance(x, y);

        final double Umax = mannWhitneyU(x, y);

        /*
         * It can be shown that U1 + U2 = n1 * n2
         */
        final double Umin = (long) x.length * y.length - Umax;

        return calculateAsymptoticPValue(Umin, x.length, y.length);
    }

}

Params:	x – first sample y – second sample
Throws:	NullArgumentException – if `x` or `y` are `null`. NoDataException – if `x` or `y` are zero-length.

Params:	x – first sample y – second sample
Returns:	concatenated array

Params:	x – the first sample y – the second sample
Throws:	NullArgumentException – if `x` or `y` are `null`. NoDataException – if `x` or `y` are zero-length.
Returns:	Mann-Whitney U statistic (maximum of U^x and U^y)

Params:	Umin – smallest Mann-Whitney U value n1 – number of subjects in first sample n2 – number of subjects in second sample
Throws:	ConvergenceException – if the p-value can not be computed due to a convergence error MaxCountExceededException – if the maximum number of iterations is exceeded
Returns:	two-sided asymptotic p-value

/

org.apache.commons/ commons-math3/ 3.6.1/ org/apache/commons/math3/stat/inference/MannWhitneyUTest.java