/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.math3.optimization.general;

import java.util.Arrays;

import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.optimization.PointVectorValuePair;
import org.apache.commons.math3.optimization.ConvergenceChecker;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.util.Precision;
import org.apache.commons.math3.util.FastMath;


This class solves a least squares problem using the Levenberg-Marquardt algorithm.

This implementation should work even for over-determined systems (i.e. systems having more point than equations). Over-determined systems are solved by ignoring the point which have the smallest impact according to their jacobian column norm. Only the rank of the matrix and some loop bounds are changed to implement this.

The resolution engine is a simple translation of the MINPACK lmder routine with minor changes. The changes include the over-determined resolution, the use of inherited convergence checker and the Q.R. decomposition which has been rewritten following the algorithm described in the P. Lascaux and R. Theodor book Analyse numérique matricielle appliquée à l'art de l'ingénieur, Masson 1986.

The authors of the original fortran version are:

  • Argonne National Laboratory. MINPACK project. March 1980
  • Burton S. Garbow
  • Kenneth E. Hillstrom
  • Jorge J. More
The redistribution policy for MINPACK is available here, for convenience, it is reproduced below.

Minpack Copyright Notice (1999) University of Chicago. All rights reserved
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
  3. The end-user documentation included with the redistribution, if any, must include the following acknowledgment: This product includes software developed by the University of Chicago, as Operator of Argonne National Laboratory. Alternately, this acknowledgment may appear in the software itself, if and wherever such third-party acknowledgments normally appear.
  4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL BE CORRECTED.
  5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE POSSIBILITY OF SUCH LOSS OR DAMAGES.
    Deprecated:As of 3.1 (to be removed in 4.0).
    Since:2.0
    /** * This class solves a least squares problem using the Levenberg-Marquardt algorithm. * * <p>This implementation <em>should</em> work even for over-determined systems * (i.e. systems having more point than equations). Over-determined systems * are solved by ignoring the point which have the smallest impact according * to their jacobian column norm. Only the rank of the matrix and some loop bounds * are changed to implement this.</p> * * <p>The resolution engine is a simple translation of the MINPACK <a * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor * changes. The changes include the over-determined resolution, the use of * inherited convergence checker and the Q.R. decomposition which has been * rewritten following the algorithm described in the * P. Lascaux and R. Theodor book <i>Analyse num&eacute;rique matricielle * appliqu&eacute;e &agrave; l'art de l'ing&eacute;nieur</i>, Masson 1986.</p> * <p>The authors of the original fortran version are: * <ul> * <li>Argonne National Laboratory. MINPACK project. March 1980</li> * <li>Burton S. Garbow</li> * <li>Kenneth E. Hillstrom</li> * <li>Jorge J. More</li> * </ul> * The redistribution policy for MINPACK is available <a * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it * is reproduced below.</p> * * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> * <tr><td> * Minpack Copyright Notice (1999) University of Chicago. * All rights reserved * </td></tr> * <tr><td> * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * <ol> * <li>Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer.</li> * <li>Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution.</li> * <li>The end-user documentation included with the redistribution, if any, * must include the following acknowledgment: * <code>This product includes software developed by the University of * Chicago, as Operator of Argonne National Laboratory.</code> * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear.</li> * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL * BE CORRECTED.</strong></li> * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> * <ol></td></tr> * </table> * @deprecated As of 3.1 (to be removed in 4.0). * @since 2.0 * */
    @Deprecated public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer {
    Number of solved point.
    /** Number of solved point. */
    private int solvedCols;
    Diagonal elements of the R matrix in the Q.R. decomposition.
    /** Diagonal elements of the R matrix in the Q.R. decomposition. */
    private double[] diagR;
    Norms of the columns of the jacobian matrix.
    /** Norms of the columns of the jacobian matrix. */
    private double[] jacNorm;
    Coefficients of the Householder transforms vectors.
    /** Coefficients of the Householder transforms vectors. */
    private double[] beta;
    Columns permutation array.
    /** Columns permutation array. */
    private int[] permutation;
    Rank of the jacobian matrix.
    /** Rank of the jacobian matrix. */
    private int rank;
    Levenberg-Marquardt parameter.
    /** Levenberg-Marquardt parameter. */
    private double lmPar;
    Parameters evolution direction associated with lmPar.
    /** Parameters evolution direction associated with lmPar. */
    private double[] lmDir;
    Positive input variable used in determining the initial step bound.
    /** Positive input variable used in determining the initial step bound. */
    private final double initialStepBoundFactor;
    Desired relative error in the sum of squares.
    /** Desired relative error in the sum of squares. */
    private final double costRelativeTolerance;
    Desired relative error in the approximate solution parameters.
    /** Desired relative error in the approximate solution parameters. */
    private final double parRelativeTolerance;
    Desired max cosine on the orthogonality between the function vector and the columns of the jacobian.
    /** Desired max cosine on the orthogonality between the function vector * and the columns of the jacobian. */
    private final double orthoTolerance;
    Threshold for QR ranking.
    /** Threshold for QR ranking. */
    private final double qrRankingThreshold;
    Weighted residuals.
    /** Weighted residuals. */
    private double[] weightedResidual;
    Weighted Jacobian.
    /** Weighted Jacobian. */
    private double[][] weightedJacobian;
    Build an optimizer for least squares problems with default values for all the tuning parameters (see the other contructor. The default values for the algorithm settings are:
    • Initial step bound factor: 100
    • Cost relative tolerance: 1e-10
    • Parameters relative tolerance: 1e-10
    • Orthogonality tolerance: 1e-10
    • QR ranking threshold: Precision.SAFE_MIN
    /** * Build an optimizer for least squares problems with default values * for all the tuning parameters (see the {@link * #LevenbergMarquardtOptimizer(double,double,double,double,double) * other contructor}. * The default values for the algorithm settings are: * <ul> * <li>Initial step bound factor: 100</li> * <li>Cost relative tolerance: 1e-10</li> * <li>Parameters relative tolerance: 1e-10</li> * <li>Orthogonality tolerance: 1e-10</li> * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> * </ul> */
    public LevenbergMarquardtOptimizer() { this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); }
    Constructor that allows the specification of a custom convergence checker. Note that all the usual convergence checks will be disabled. The default values for the algorithm settings are:
    • Initial step bound factor: 100
    • Cost relative tolerance: 1e-10
    • Parameters relative tolerance: 1e-10
    • Orthogonality tolerance: 1e-10
    • QR ranking threshold: Precision.SAFE_MIN
    Params:
    • checker – Convergence checker.
    /** * Constructor that allows the specification of a custom convergence * checker. * Note that all the usual convergence checks will be <em>disabled</em>. * The default values for the algorithm settings are: * <ul> * <li>Initial step bound factor: 100</li> * <li>Cost relative tolerance: 1e-10</li> * <li>Parameters relative tolerance: 1e-10</li> * <li>Orthogonality tolerance: 1e-10</li> * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> * </ul> * * @param checker Convergence checker. */
    public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); }
    Constructor that allows the specification of a custom convergence checker, in addition to the standard ones.
    Params:
    • initialStepBoundFactor – Positive input variable used in determining the initial step bound. This bound is set to the product of initialStepBoundFactor and the euclidean norm of diag * x if non-zero, or else to initialStepBoundFactor itself. In most cases factor should lie in the interval (0.1, 100.0). 100 is a generally recommended value.
    • checker – Convergence checker.
    • costRelativeTolerance – Desired relative error in the sum of squares.
    • parRelativeTolerance – Desired relative error in the approximate solution parameters.
    • orthoTolerance – Desired max cosine on the orthogonality between the function vector and the columns of the Jacobian.
    • threshold – Desired threshold for QR ranking. If the squared norm of a column vector is smaller or equal to this threshold during QR decomposition, it is considered to be a zero vector and hence the rank of the matrix is reduced.
    /** * Constructor that allows the specification of a custom convergence * checker, in addition to the standard ones. * * @param initialStepBoundFactor Positive input variable used in * determining the initial step bound. This bound is set to the * product of initialStepBoundFactor and the euclidean norm of * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} * itself. In most cases factor should lie in the interval * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. * @param checker Convergence checker. * @param costRelativeTolerance Desired relative error in the sum of * squares. * @param parRelativeTolerance Desired relative error in the approximate * solution parameters. * @param orthoTolerance Desired max cosine on the orthogonality between * the function vector and the columns of the Jacobian. * @param threshold Desired threshold for QR ranking. If the squared norm * of a column vector is smaller or equal to this threshold during QR * decomposition, it is considered to be a zero vector and hence the rank * of the matrix is reduced. */
    public LevenbergMarquardtOptimizer(double initialStepBoundFactor, ConvergenceChecker<PointVectorValuePair> checker, double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance, double threshold) { super(checker); this.initialStepBoundFactor = initialStepBoundFactor; this.costRelativeTolerance = costRelativeTolerance; this.parRelativeTolerance = parRelativeTolerance; this.orthoTolerance = orthoTolerance; this.qrRankingThreshold = threshold; }
    Build an optimizer for least squares problems with default values for some of the tuning parameters (see the other contructor. The default values for the algorithm settings are:
    Params:
    • costRelativeTolerance – Desired relative error in the sum of squares.
    • parRelativeTolerance – Desired relative error in the approximate solution parameters.
    • orthoTolerance – Desired max cosine on the orthogonality between the function vector and the columns of the Jacobian.
    /** * Build an optimizer for least squares problems with default values * for some of the tuning parameters (see the {@link * #LevenbergMarquardtOptimizer(double,double,double,double,double) * other contructor}. * The default values for the algorithm settings are: * <ul> * <li>Initial step bound factor}: 100</li> * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> * </ul> * * @param costRelativeTolerance Desired relative error in the sum of * squares. * @param parRelativeTolerance Desired relative error in the approximate * solution parameters. * @param orthoTolerance Desired max cosine on the orthogonality between * the function vector and the columns of the Jacobian. */
    public LevenbergMarquardtOptimizer(double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance) { this(100, costRelativeTolerance, parRelativeTolerance, orthoTolerance, Precision.SAFE_MIN); }
    The arguments control the behaviour of the default convergence checking procedure. Additional criteria can defined through the setting of a ConvergenceChecker.
    Params:
    • initialStepBoundFactor – Positive input variable used in determining the initial step bound. This bound is set to the product of initialStepBoundFactor and the euclidean norm of diag * x if non-zero, or else to initialStepBoundFactor itself. In most cases factor should lie in the interval (0.1, 100.0). 100 is a generally recommended value.
    • costRelativeTolerance – Desired relative error in the sum of squares.
    • parRelativeTolerance – Desired relative error in the approximate solution parameters.
    • orthoTolerance – Desired max cosine on the orthogonality between the function vector and the columns of the Jacobian.
    • threshold – Desired threshold for QR ranking. If the squared norm of a column vector is smaller or equal to this threshold during QR decomposition, it is considered to be a zero vector and hence the rank of the matrix is reduced.
    /** * The arguments control the behaviour of the default convergence checking * procedure. * Additional criteria can defined through the setting of a {@link * ConvergenceChecker}. * * @param initialStepBoundFactor Positive input variable used in * determining the initial step bound. This bound is set to the * product of initialStepBoundFactor and the euclidean norm of * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} * itself. In most cases factor should lie in the interval * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. * @param costRelativeTolerance Desired relative error in the sum of * squares. * @param parRelativeTolerance Desired relative error in the approximate * solution parameters. * @param orthoTolerance Desired max cosine on the orthogonality between * the function vector and the columns of the Jacobian. * @param threshold Desired threshold for QR ranking. If the squared norm * of a column vector is smaller or equal to this threshold during QR * decomposition, it is considered to be a zero vector and hence the rank * of the matrix is reduced. */
    public LevenbergMarquardtOptimizer(double initialStepBoundFactor, double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance, double threshold) { super(null); // No custom convergence criterion. this.initialStepBoundFactor = initialStepBoundFactor; this.costRelativeTolerance = costRelativeTolerance; this.parRelativeTolerance = parRelativeTolerance; this.orthoTolerance = orthoTolerance; this.qrRankingThreshold = threshold; }
    {@inheritDoc}
    /** {@inheritDoc} */
    @Override protected PointVectorValuePair doOptimize() { final int nR = getTarget().length; // Number of observed data. final double[] currentPoint = getStartPoint(); final int nC = currentPoint.length; // Number of parameters. // arrays shared with the other private methods solvedCols = FastMath.min(nR, nC); diagR = new double[nC]; jacNorm = new double[nC]; beta = new double[nC]; permutation = new int[nC]; lmDir = new double[nC]; // local point double delta = 0; double xNorm = 0; double[] diag = new double[nC]; double[] oldX = new double[nC]; double[] oldRes = new double[nR]; double[] oldObj = new double[nR]; double[] qtf = new double[nR]; double[] work1 = new double[nC]; double[] work2 = new double[nC]; double[] work3 = new double[nC]; final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); // Evaluate the function at the starting point and calculate its norm. double[] currentObjective = computeObjectiveValue(currentPoint); double[] currentResiduals = computeResiduals(currentObjective); PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); double currentCost = computeCost(currentResiduals); // Outer loop. lmPar = 0; boolean firstIteration = true; int iter = 0; final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); while (true) { ++iter; final PointVectorValuePair previous = current; // QR decomposition of the jacobian matrix qrDecomposition(computeWeightedJacobian(currentPoint)); weightedResidual = weightMatrixSqrt.operate(currentResiduals); for (int i = 0; i < nR; i++) { qtf[i] = weightedResidual[i]; } // compute Qt.res qTy(qtf); // now we don't need Q anymore, // so let jacobian contain the R matrix with its diagonal elements for (int k = 0; k < solvedCols; ++k) { int pk = permutation[k]; weightedJacobian[k][pk] = diagR[pk]; } if (firstIteration) { // scale the point according to the norms of the columns // of the initial jacobian xNorm = 0; for (int k = 0; k < nC; ++k) { double dk = jacNorm[k]; if (dk == 0) { dk = 1.0; } double xk = dk * currentPoint[k]; xNorm += xk * xk; diag[k] = dk; } xNorm = FastMath.sqrt(xNorm); // initialize the step bound delta delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); } // check orthogonality between function vector and jacobian columns double maxCosine = 0; if (currentCost != 0) { for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double s = jacNorm[pj]; if (s != 0) { double sum = 0; for (int i = 0; i <= j; ++i) { sum += weightedJacobian[i][pj] * qtf[i]; } maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); } } } if (maxCosine <= orthoTolerance) { // Convergence has been reached. setCost(currentCost); // Update (deprecated) "point" field. point = current.getPoint(); return current; } // rescale if necessary for (int j = 0; j < nC; ++j) { diag[j] = FastMath.max(diag[j], jacNorm[j]); } // Inner loop. for (double ratio = 0; ratio < 1.0e-4;) { // save the state for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; oldX[pj] = currentPoint[pj]; } final double previousCost = currentCost; double[] tmpVec = weightedResidual; weightedResidual = oldRes; oldRes = tmpVec; tmpVec = currentObjective; currentObjective = oldObj; oldObj = tmpVec; // determine the Levenberg-Marquardt parameter determineLMParameter(qtf, delta, diag, work1, work2, work3); // compute the new point and the norm of the evolution direction double lmNorm = 0; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; lmDir[pj] = -lmDir[pj]; currentPoint[pj] = oldX[pj] + lmDir[pj]; double s = diag[pj] * lmDir[pj]; lmNorm += s * s; } lmNorm = FastMath.sqrt(lmNorm); // on the first iteration, adjust the initial step bound. if (firstIteration) { delta = FastMath.min(delta, lmNorm); } // Evaluate the function at x + p and calculate its norm. currentObjective = computeObjectiveValue(currentPoint); currentResiduals = computeResiduals(currentObjective); current = new PointVectorValuePair(currentPoint, currentObjective); currentCost = computeCost(currentResiduals); // compute the scaled actual reduction double actRed = -1.0; if (0.1 * currentCost < previousCost) { double r = currentCost / previousCost; actRed = 1.0 - r * r; } // compute the scaled predicted reduction // and the scaled directional derivative for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double dirJ = lmDir[pj]; work1[j] = 0; for (int i = 0; i <= j; ++i) { work1[i] += weightedJacobian[i][pj] * dirJ; } } double coeff1 = 0; for (int j = 0; j < solvedCols; ++j) { coeff1 += work1[j] * work1[j]; } double pc2 = previousCost * previousCost; coeff1 /= pc2; double coeff2 = lmPar * lmNorm * lmNorm / pc2; double preRed = coeff1 + 2 * coeff2; double dirDer = -(coeff1 + coeff2); // ratio of the actual to the predicted reduction ratio = (preRed == 0) ? 0 : (actRed / preRed); // update the step bound if (ratio <= 0.25) { double tmp = (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { tmp = 0.1; } delta = tmp * FastMath.min(delta, 10.0 * lmNorm); lmPar /= tmp; } else if ((lmPar == 0) || (ratio >= 0.75)) { delta = 2 * lmNorm; lmPar *= 0.5; } // test for successful iteration. if (ratio >= 1.0e-4) { // successful iteration, update the norm firstIteration = false; xNorm = 0; for (int k = 0; k < nC; ++k) { double xK = diag[k] * currentPoint[k]; xNorm += xK * xK; } xNorm = FastMath.sqrt(xNorm); // tests for convergence. if (checker != null && checker.converged(iter, previous, current)) { setCost(currentCost); // Update (deprecated) "point" field. point = current.getPoint(); return current; } } else { // failed iteration, reset the previous values currentCost = previousCost; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; currentPoint[pj] = oldX[pj]; } tmpVec = weightedResidual; weightedResidual = oldRes; oldRes = tmpVec; tmpVec = currentObjective; currentObjective = oldObj; oldObj = tmpVec; // Reset "current" to previous values. current = new PointVectorValuePair(currentPoint, currentObjective); } // Default convergence criteria. if ((FastMath.abs(actRed) <= costRelativeTolerance && preRed <= costRelativeTolerance && ratio <= 2.0) || delta <= parRelativeTolerance * xNorm) { setCost(currentCost); // Update (deprecated) "point" field. point = current.getPoint(); return current; } // tests for termination and stringent tolerances // (2.2204e-16 is the machine epsilon for IEEE754) if ((FastMath.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) { throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, costRelativeTolerance); } else if (delta <= 2.2204e-16 * xNorm) { throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, parRelativeTolerance); } else if (maxCosine <= 2.2204e-16) { throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, orthoTolerance); } } } }
    Determine the Levenberg-Marquardt parameter.

    This implementation is a translation in Java of the MINPACK lmpar routine.

    This method sets the lmPar and lmDir attributes.

    The authors of the original fortran function are:

    • Argonne National Laboratory. MINPACK project. March 1980
    • Burton S. Garbow
    • Kenneth E. Hillstrom
    • Jorge J. More

    Luc Maisonobe did the Java translation.

    Params:
    • qy – array containing qTy
    • delta – upper bound on the euclidean norm of diagR * lmDir
    • diag – diagonal matrix
    • work1 – work array
    • work2 – work array
    • work3 – work array
    /** * Determine the Levenberg-Marquardt parameter. * <p>This implementation is a translation in Java of the MINPACK * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> * routine.</p> * <p>This method sets the lmPar and lmDir attributes.</p> * <p>The authors of the original fortran function are:</p> * <ul> * <li>Argonne National Laboratory. MINPACK project. March 1980</li> * <li>Burton S. Garbow</li> * <li>Kenneth E. Hillstrom</li> * <li>Jorge J. More</li> * </ul> * <p>Luc Maisonobe did the Java translation.</p> * * @param qy array containing qTy * @param delta upper bound on the euclidean norm of diagR * lmDir * @param diag diagonal matrix * @param work1 work array * @param work2 work array * @param work3 work array */
    private void determineLMParameter(double[] qy, double delta, double[] diag, double[] work1, double[] work2, double[] work3) { final int nC = weightedJacobian[0].length; // compute and store in x the gauss-newton direction, if the // jacobian is rank-deficient, obtain a least squares solution for (int j = 0; j < rank; ++j) { lmDir[permutation[j]] = qy[j]; } for (int j = rank; j < nC; ++j) { lmDir[permutation[j]] = 0; } for (int k = rank - 1; k >= 0; --k) { int pk = permutation[k]; double ypk = lmDir[pk] / diagR[pk]; for (int i = 0; i < k; ++i) { lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; } lmDir[pk] = ypk; } // evaluate the function at the origin, and test // for acceptance of the Gauss-Newton direction double dxNorm = 0; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double s = diag[pj] * lmDir[pj]; work1[pj] = s; dxNorm += s * s; } dxNorm = FastMath.sqrt(dxNorm); double fp = dxNorm - delta; if (fp <= 0.1 * delta) { lmPar = 0; return; } // if the jacobian is not rank deficient, the Newton step provides // a lower bound, parl, for the zero of the function, // otherwise set this bound to zero double sum2; double parl = 0; if (rank == solvedCols) { for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; work1[pj] *= diag[pj] / dxNorm; } sum2 = 0; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double sum = 0; for (int i = 0; i < j; ++i) { sum += weightedJacobian[i][pj] * work1[permutation[i]]; } double s = (work1[pj] - sum) / diagR[pj]; work1[pj] = s; sum2 += s * s; } parl = fp / (delta * sum2); } // calculate an upper bound, paru, for the zero of the function sum2 = 0; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double sum = 0; for (int i = 0; i <= j; ++i) { sum += weightedJacobian[i][pj] * qy[i]; } sum /= diag[pj]; sum2 += sum * sum; } double gNorm = FastMath.sqrt(sum2); double paru = gNorm / delta; if (paru == 0) { // 2.2251e-308 is the smallest positive real for IEE754 paru = 2.2251e-308 / FastMath.min(delta, 0.1); } // if the input par lies outside of the interval (parl,paru), // set par to the closer endpoint lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); if (lmPar == 0) { lmPar = gNorm / dxNorm; } for (int countdown = 10; countdown >= 0; --countdown) { // evaluate the function at the current value of lmPar if (lmPar == 0) { lmPar = FastMath.max(2.2251e-308, 0.001 * paru); } double sPar = FastMath.sqrt(lmPar); for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; work1[pj] = sPar * diag[pj]; } determineLMDirection(qy, work1, work2, work3); dxNorm = 0; for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; double s = diag[pj] * lmDir[pj]; work3[pj] = s; dxNorm += s * s; } dxNorm = FastMath.sqrt(dxNorm); double previousFP = fp; fp = dxNorm - delta; // if the function is small enough, accept the current value // of lmPar, also test for the exceptional cases where parl is zero if ((FastMath.abs(fp) <= 0.1 * delta) || ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { return; } // compute the Newton correction for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; work1[pj] = work3[pj] * diag[pj] / dxNorm; } for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; work1[pj] /= work2[j]; double tmp = work1[pj]; for (int i = j + 1; i < solvedCols; ++i) { work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; } } sum2 = 0; for (int j = 0; j < solvedCols; ++j) { double s = work1[permutation[j]]; sum2 += s * s; } double correction = fp / (delta * sum2); // depending on the sign of the function, update parl or paru. if (fp > 0) { parl = FastMath.max(parl, lmPar); } else if (fp < 0) { paru = FastMath.min(paru, lmPar); } // compute an improved estimate for lmPar lmPar = FastMath.max(parl, lmPar + correction); } }
    Solve a*x = b and d*x = 0 in the least squares sense.

    This implementation is a translation in Java of the MINPACK qrsolv routine.

    This method sets the lmDir and lmDiag attributes.

    The authors of the original fortran function are:

    • Argonne National Laboratory. MINPACK project. March 1980
    • Burton S. Garbow
    • Kenneth E. Hillstrom
    • Jorge J. More

    Luc Maisonobe did the Java translation.

    Params:
    • qy – array containing qTy
    • diag – diagonal matrix
    • lmDiag – diagonal elements associated with lmDir
    • work – work array
    /** * Solve a*x = b and d*x = 0 in the least squares sense. * <p>This implementation is a translation in Java of the MINPACK * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> * routine.</p> * <p>This method sets the lmDir and lmDiag attributes.</p> * <p>The authors of the original fortran function are:</p> * <ul> * <li>Argonne National Laboratory. MINPACK project. March 1980</li> * <li>Burton S. Garbow</li> * <li>Kenneth E. Hillstrom</li> * <li>Jorge J. More</li> * </ul> * <p>Luc Maisonobe did the Java translation.</p> * * @param qy array containing qTy * @param diag diagonal matrix * @param lmDiag diagonal elements associated with lmDir * @param work work array */
    private void determineLMDirection(double[] qy, double[] diag, double[] lmDiag, double[] work) { // copy R and Qty to preserve input and initialize s // in particular, save the diagonal elements of R in lmDir for (int j = 0; j < solvedCols; ++j) { int pj = permutation[j]; for (int i = j + 1; i < solvedCols; ++i) { weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; } lmDir[j] = diagR[pj]; work[j] = qy[j]; } // eliminate the diagonal matrix d using a Givens rotation for (int j = 0; j < solvedCols; ++j) { // prepare the row of d to be eliminated, locating the // diagonal element using p from the Q.R. factorization int pj = permutation[j]; double dpj = diag[pj]; if (dpj != 0) { Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); } lmDiag[j] = dpj; // the transformations to eliminate the row of d // modify only a single element of Qty // beyond the first n, which is initially zero. double qtbpj = 0; for (int k = j; k < solvedCols; ++k) { int pk = permutation[k]; // determine a Givens rotation which eliminates the // appropriate element in the current row of d if (lmDiag[k] != 0) { final double sin; final double cos; double rkk = weightedJacobian[k][pk]; if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { final double cotan = rkk / lmDiag[k]; sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); cos = sin * cotan; } else { final double tan = lmDiag[k] / rkk; cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); sin = cos * tan; } // compute the modified diagonal element of R and // the modified element of (Qty,0) weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; final double temp = cos * work[k] + sin * qtbpj; qtbpj = -sin * work[k] + cos * qtbpj; work[k] = temp; // accumulate the tranformation in the row of s for (int i = k + 1; i < solvedCols; ++i) { double rik = weightedJacobian[i][pk]; final double temp2 = cos * rik + sin * lmDiag[i]; lmDiag[i] = -sin * rik + cos * lmDiag[i]; weightedJacobian[i][pk] = temp2; } } } // store the diagonal element of s and restore // the corresponding diagonal element of R lmDiag[j] = weightedJacobian[j][permutation[j]]; weightedJacobian[j][permutation[j]] = lmDir[j]; } // solve the triangular system for z, if the system is // singular, then obtain a least squares solution int nSing = solvedCols; for (int j = 0; j < solvedCols; ++j) { if ((lmDiag[j] == 0) && (nSing == solvedCols)) { nSing = j; } if (nSing < solvedCols) { work[j] = 0; } } if (nSing > 0) { for (int j = nSing - 1; j >= 0; --j) { int pj = permutation[j]; double sum = 0; for (int i = j + 1; i < nSing; ++i) { sum += weightedJacobian[i][pj] * work[i]; } work[j] = (work[j] - sum) / lmDiag[j]; } } // permute the components of z back to components of lmDir for (int j = 0; j < lmDir.length; ++j) { lmDir[permutation[j]] = work[j]; } }
    Decompose a matrix A as A.P = Q.R using Householder transforms.

    As suggested in the P. Lascaux and R. Theodor book Analyse numérique matricielle appliquée à l'art de l'ingénieur (Masson, 1986), instead of representing the Householder transforms with uk unit vectors such that:

    Hk = I - 2uk.ukt
    
    we use k non-unit vectors such that:
    Hk = I - betakvk.vkt
    
    where vk = ak - alphak ek. The betak coefficients are provided upon exit as recomputing them from the vk vectors would be costly.

    This decomposition handles rank deficient cases since the tranformations are performed in non-increasing columns norms order thanks to columns pivoting. The diagonal elements of the R matrix are therefore also in non-increasing absolute values order.

    Params:
    • jacobian – Weighted Jacobian matrix at the current point.
    Throws:
    /** * Decompose a matrix A as A.P = Q.R using Householder transforms. * <p>As suggested in the P. Lascaux and R. Theodor book * <i>Analyse num&eacute;rique matricielle appliqu&eacute;e &agrave; * l'art de l'ing&eacute;nieur</i> (Masson, 1986), instead of representing * the Householder transforms with u<sub>k</sub> unit vectors such that: * <pre> * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> * </pre> * we use <sub>k</sub> non-unit vectors such that: * <pre> * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> * </pre> * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. * The beta<sub>k</sub> coefficients are provided upon exit as recomputing * them from the v<sub>k</sub> vectors would be costly.</p> * <p>This decomposition handles rank deficient cases since the tranformations * are performed in non-increasing columns norms order thanks to columns * pivoting. The diagonal elements of the R matrix are therefore also in * non-increasing absolute values order.</p> * * @param jacobian Weighted Jacobian matrix at the current point. * @exception ConvergenceException if the decomposition cannot be performed */
    private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), // hence the multiplication by -1. weightedJacobian = jacobian.scalarMultiply(-1).getData(); final int nR = weightedJacobian.length; final int nC = weightedJacobian[0].length; // initializations for (int k = 0; k < nC; ++k) { permutation[k] = k; double norm2 = 0; for (int i = 0; i < nR; ++i) { double akk = weightedJacobian[i][k]; norm2 += akk * akk; } jacNorm[k] = FastMath.sqrt(norm2); } // transform the matrix column after column for (int k = 0; k < nC; ++k) { // select the column with the greatest norm on active components int nextColumn = -1; double ak2 = Double.NEGATIVE_INFINITY; for (int i = k; i < nC; ++i) { double norm2 = 0; for (int j = k; j < nR; ++j) { double aki = weightedJacobian[j][permutation[i]]; norm2 += aki * aki; } if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, nR, nC); } if (norm2 > ak2) { nextColumn = i; ak2 = norm2; } } if (ak2 <= qrRankingThreshold) { rank = k; return; } int pk = permutation[nextColumn]; permutation[nextColumn] = permutation[k]; permutation[k] = pk; // choose alpha such that Hk.u = alpha ek double akk = weightedJacobian[k][pk]; double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); double betak = 1.0 / (ak2 - akk * alpha); beta[pk] = betak; // transform the current column diagR[pk] = alpha; weightedJacobian[k][pk] -= alpha; // transform the remaining columns for (int dk = nC - 1 - k; dk > 0; --dk) { double gamma = 0; for (int j = k; j < nR; ++j) { gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; } gamma *= betak; for (int j = k; j < nR; ++j) { weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; } } } rank = solvedCols; }
    Compute the product Qt.y for some Q.R. decomposition.
    Params:
    • y – vector to multiply (will be overwritten with the result)
    /** * Compute the product Qt.y for some Q.R. decomposition. * * @param y vector to multiply (will be overwritten with the result) */
    private void qTy(double[] y) { final int nR = weightedJacobian.length; final int nC = weightedJacobian[0].length; for (int k = 0; k < nC; ++k) { int pk = permutation[k]; double gamma = 0; for (int i = k; i < nR; ++i) { gamma += weightedJacobian[i][pk] * y[i]; } gamma *= beta[pk]; for (int i = k; i < nR; ++i) { y[i] -= gamma * weightedJacobian[i][pk]; } } } }