/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.regression;
import java.util.Arrays;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.Precision;
import org.apache.commons.math3.util.MathArrays;
This class is a concrete implementation of the UpdatingMultipleLinearRegression
interface. The algorithm is described in:
Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
Author(s): Alan J. Miller
Source: Journal of the Royal Statistical Society.
Series C (Applied Statistics), Vol. 41, No. 2
(1992), pp. 458-478
Published by: Blackwell Publishing for the Royal Statistical Society
Stable URL: http://www.jstor.org/stable/2347583
This method for multiple regression forms the solution to the OLS problem
by updating the QR decomposition as described by Gentleman.
Since: 3.0
/**
* This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
*
* <p>The algorithm is described in: <pre>
* Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
* Author(s): Alan J. Miller
* Source: Journal of the Royal Statistical Society.
* Series C (Applied Statistics), Vol. 41, No. 2
* (1992), pp. 458-478
* Published by: Blackwell Publishing for the Royal Statistical Society
* Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
*
* <p>This method for multiple regression forms the solution to the OLS problem
* by updating the QR decomposition as described by Gentleman.</p>
*
* @since 3.0
*/
public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
number of variables in regression /** number of variables in regression */
private final int nvars;
diagonals of cross products matrix /** diagonals of cross products matrix */
private final double[] d;
the elements of the R`Y /** the elements of the R`Y */
private final double[] rhs;
the off diagonal portion of the R matrix /** the off diagonal portion of the R matrix */
private final double[] r;
the tolerance for each of the variables /** the tolerance for each of the variables */
private final double[] tol;
residual sum of squares for all nested regressions /** residual sum of squares for all nested regressions */
private final double[] rss;
order of the regressors /** order of the regressors */
private final int[] vorder;
scratch space for tolerance calc /** scratch space for tolerance calc */
private final double[] work_tolset;
number of observations entered /** number of observations entered */
private long nobs = 0;
sum of squared errors of largest regression /** sum of squared errors of largest regression */
private double sserr = 0.0;
has rss been called? /** has rss been called? */
private boolean rss_set = false;
has the tolerance setting method been called /** has the tolerance setting method been called */
private boolean tol_set = false;
flags for variables with linear dependency problems /** flags for variables with linear dependency problems */
private final boolean[] lindep;
singular x values /** singular x values */
private final double[] x_sing;
workspace for singularity method /** workspace for singularity method */
private final double[] work_sing;
summation of Y variable /** summation of Y variable */
private double sumy = 0.0;
summation of squared Y values /** summation of squared Y values */
private double sumsqy = 0.0;
boolean flag whether a regression constant is added /** boolean flag whether a regression constant is added */
private boolean hasIntercept;
zero tolerance /** zero tolerance */
private final double epsilon;
Set the default constructor to private access
to prevent inadvertent instantiation
/**
* Set the default constructor to private access
* to prevent inadvertent instantiation
*/
@SuppressWarnings("unused")
private MillerUpdatingRegression() {
this(-1, false, Double.NaN);
}
This is the augmented constructor for the MillerUpdatingRegression class.
Params: - numberOfVariables – number of regressors to expect, not including constant
- includeConstant – include a constant automatically
- errorTolerance – zero tolerance, how machine zero is determined
Throws: - ModelSpecificationException – if
numberOfVariables is less than 1
/**
* This is the augmented constructor for the MillerUpdatingRegression class.
*
* @param numberOfVariables number of regressors to expect, not including constant
* @param includeConstant include a constant automatically
* @param errorTolerance zero tolerance, how machine zero is determined
* @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
*/
public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
throws ModelSpecificationException {
if (numberOfVariables < 1) {
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (includeConstant) {
this.nvars = numberOfVariables + 1;
} else {
this.nvars = numberOfVariables;
}
this.hasIntercept = includeConstant;
this.nobs = 0;
this.d = new double[this.nvars];
this.rhs = new double[this.nvars];
this.r = new double[this.nvars * (this.nvars - 1) / 2];
this.tol = new double[this.nvars];
this.rss = new double[this.nvars];
this.vorder = new int[this.nvars];
this.x_sing = new double[this.nvars];
this.work_sing = new double[this.nvars];
this.work_tolset = new double[this.nvars];
this.lindep = new boolean[this.nvars];
for (int i = 0; i < this.nvars; i++) {
vorder[i] = i;
}
if (errorTolerance > 0) {
this.epsilon = errorTolerance;
} else {
this.epsilon = -errorTolerance;
}
}
Primary constructor for the MillerUpdatingRegression.
Params: - numberOfVariables – maximum number of potential regressors
- includeConstant – include a constant automatically
Throws: - ModelSpecificationException – if
numberOfVariables is less than 1
/**
* Primary constructor for the MillerUpdatingRegression.
*
* @param numberOfVariables maximum number of potential regressors
* @param includeConstant include a constant automatically
* @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
*/
public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
throws ModelSpecificationException {
this(numberOfVariables, includeConstant, Precision.EPSILON);
}
A getter method which determines whether a constant is included.
Returns: true regression has an intercept, false no intercept
/**
* A getter method which determines whether a constant is included.
* @return true regression has an intercept, false no intercept
*/
public boolean hasIntercept() {
return this.hasIntercept;
}
Gets the number of observations added to the regression model.
Returns: number of observations
/**
* Gets the number of observations added to the regression model.
* @return number of observations
*/
public long getN() {
return this.nobs;
}
Adds an observation to the regression model.
Params: - x – the array with regressor values
- y – the value of dependent variable given these regressors
Throws: - ModelSpecificationException – if the length of
x
does not equal the number of independent variables in the model
/**
* Adds an observation to the regression model.
* @param x the array with regressor values
* @param y the value of dependent variable given these regressors
* @exception ModelSpecificationException if the length of {@code x} does not equal
* the number of independent variables in the model
*/
public void addObservation(final double[] x, final double y)
throws ModelSpecificationException {
if ((!this.hasIntercept && x.length != nvars) ||
(this.hasIntercept && x.length + 1 != nvars)) {
throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
x.length, nvars);
}
if (!this.hasIntercept) {
include(MathArrays.copyOf(x, x.length), 1.0, y);
} else {
final double[] tmp = new double[x.length + 1];
System.arraycopy(x, 0, tmp, 1, x.length);
tmp[0] = 1.0;
include(tmp, 1.0, y);
}
++nobs;
}
Adds multiple observations to the model.
Params: - x – observations on the regressors
- y – observations on the regressand
Throws: - ModelSpecificationException – if
x
is not rectangular, does not match the length of y
or does not contain sufficient data to estimate the model
/**
* Adds multiple observations to the model.
* @param x observations on the regressors
* @param y observations on the regressand
* @throws ModelSpecificationException if {@code x} is not rectangular, does not match
* the length of {@code y} or does not contain sufficient data to estimate the model
*/
public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
if ((x == null) || (y == null) || (x.length != y.length)) {
throw new ModelSpecificationException(
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
(x == null) ? 0 : x.length,
(y == null) ? 0 : y.length);
}
if (x.length == 0) { // Must be no y data either
throw new ModelSpecificationException(
LocalizedFormats.NO_DATA);
}
if (x[0].length + 1 > x.length) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
x.length, x[0].length);
}
for (int i = 0; i < x.length; i++) {
addObservation(x[i], y[i]);
}
}
The include method is where the QR decomposition occurs. This statement forms all
intermediate data which will be used for all derivative measures.
According to the miller paper, note that in the original implementation the x vector
is overwritten. In this implementation, the include method is passed a copy of the
original data vector so that there is no contamination of the data. Additionally,
this method differs slightly from Gentleman's method, in that the assumption is
of dense design matrices, there is some advantage in using the original gentleman algorithm
on sparse matrices.
Params: - x – observations on the regressors
- wi – weight of the this observation (-1,1)
- yi – observation on the regressand
/**
* The include method is where the QR decomposition occurs. This statement forms all
* intermediate data which will be used for all derivative measures.
* According to the miller paper, note that in the original implementation the x vector
* is overwritten. In this implementation, the include method is passed a copy of the
* original data vector so that there is no contamination of the data. Additionally,
* this method differs slightly from Gentleman's method, in that the assumption is
* of dense design matrices, there is some advantage in using the original gentleman algorithm
* on sparse matrices.
*
* @param x observations on the regressors
* @param wi weight of the this observation (-1,1)
* @param yi observation on the regressand
*/
private void include(final double[] x, final double wi, final double yi) {
int nextr = 0;
double w = wi;
double y = yi;
double xi;
double di;
double wxi;
double dpi;
double xk;
double _w;
this.rss_set = false;
sumy = smartAdd(yi, sumy);
sumsqy = smartAdd(sumsqy, yi * yi);
for (int i = 0; i < x.length; i++) {
if (w == 0.0) {
return;
}
xi = x[i];
if (xi == 0.0) {
nextr += nvars - i - 1;
continue;
}
di = d[i];
wxi = w * xi;
_w = w;
if (di != 0.0) {
dpi = smartAdd(di, wxi * xi);
final double tmp = wxi * xi / di;
if (FastMath.abs(tmp) > Precision.EPSILON) {
w = (di * w) / dpi;
}
} else {
dpi = wxi * xi;
w = 0.0;
}
d[i] = dpi;
for (int k = i + 1; k < nvars; k++) {
xk = x[k];
x[k] = smartAdd(xk, -xi * r[nextr]);
if (di != 0.0) {
r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
} else {
r[nextr] = xk / xi;
}
++nextr;
}
xk = y;
y = smartAdd(xk, -xi * rhs[i]);
if (di != 0.0) {
rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
} else {
rhs[i] = xk / xi;
}
}
sserr = smartAdd(sserr, w * y * y);
}
Adds to number a and b such that the contamination due to
numerical smallness of one addend does not corrupt the sum.
Params: - a – - an addend
- b – - an addend
Returns: the sum of the a and b
/**
* Adds to number a and b such that the contamination due to
* numerical smallness of one addend does not corrupt the sum.
* @param a - an addend
* @param b - an addend
* @return the sum of the a and b
*/
private double smartAdd(double a, double b) {
final double _a = FastMath.abs(a);
final double _b = FastMath.abs(b);
if (_a > _b) {
final double eps = _a * Precision.EPSILON;
if (_b > eps) {
return a + b;
}
return a;
} else {
final double eps = _b * Precision.EPSILON;
if (_a > eps) {
return a + b;
}
return b;
}
}
As the name suggests, clear wipes the internals and reorders everything in the
canonical order.
/**
* As the name suggests, clear wipes the internals and reorders everything in the
* canonical order.
*/
public void clear() {
Arrays.fill(this.d, 0.0);
Arrays.fill(this.rhs, 0.0);
Arrays.fill(this.r, 0.0);
Arrays.fill(this.tol, 0.0);
Arrays.fill(this.rss, 0.0);
Arrays.fill(this.work_tolset, 0.0);
Arrays.fill(this.work_sing, 0.0);
Arrays.fill(this.x_sing, 0.0);
Arrays.fill(this.lindep, false);
for (int i = 0; i < nvars; i++) {
this.vorder[i] = i;
}
this.nobs = 0;
this.sserr = 0.0;
this.sumy = 0.0;
this.sumsqy = 0.0;
this.rss_set = false;
this.tol_set = false;
}
This sets up tolerances for singularity testing.
/**
* This sets up tolerances for singularity testing.
*/
private void tolset() {
int pos;
double total;
final double eps = this.epsilon;
for (int i = 0; i < nvars; i++) {
this.work_tolset[i] = FastMath.sqrt(d[i]);
}
tol[0] = eps * this.work_tolset[0];
for (int col = 1; col < nvars; col++) {
pos = col - 1;
total = work_tolset[col];
for (int row = 0; row < col; row++) {
total += FastMath.abs(r[pos]) * work_tolset[row];
pos += nvars - row - 2;
}
tol[col] = eps * total;
}
tol_set = true;
}
The regcf method conducts the linear regression and extracts the
parameter vector. Notice that the algorithm can do subset regression
with no alteration.
Params: - nreq – how many of the regressors to include (either in canonical
order, or in the current reordered state)
Throws: - ModelSpecificationException – if
nreq
is less than 1 or greater than the number of independent variables
Returns: an array with the estimated slope coefficients
/**
* The regcf method conducts the linear regression and extracts the
* parameter vector. Notice that the algorithm can do subset regression
* with no alteration.
*
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the estimated slope coefficients
* @throws ModelSpecificationException if {@code nreq} is less than 1
* or greater than the number of independent variables
*/
private double[] regcf(int nreq) throws ModelSpecificationException {
int nextr;
if (nreq < 1) {
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (nreq > this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
}
if (!this.tol_set) {
tolset();
}
final double[] ret = new double[nreq];
boolean rankProblem = false;
for (int i = nreq - 1; i > -1; i--) {
if (FastMath.sqrt(d[i]) < tol[i]) {
ret[i] = 0.0;
d[i] = 0.0;
rankProblem = true;
} else {
ret[i] = rhs[i];
nextr = i * (nvars + nvars - i - 1) / 2;
for (int j = i + 1; j < nreq; j++) {
ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
++nextr;
}
}
}
if (rankProblem) {
for (int i = 0; i < nreq; i++) {
if (this.lindep[i]) {
ret[i] = Double.NaN;
}
}
}
return ret;
}
The method which checks for singularities and then eliminates the offending
columns.
/**
* The method which checks for singularities and then eliminates the offending
* columns.
*/
private void singcheck() {
int pos;
for (int i = 0; i < nvars; i++) {
work_sing[i] = FastMath.sqrt(d[i]);
}
for (int col = 0; col < nvars; col++) {
// Set elements within R to zero if they are less than tol(col) in
// absolute value after being scaled by the square root of their row
// multiplier
final double temp = tol[col];
pos = col - 1;
for (int row = 0; row < col - 1; row++) {
if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
r[pos] = 0.0;
}
pos += nvars - row - 2;
}
// If diagonal element is near zero, set it to zero, set appropriate
// element of LINDEP, and use INCLUD to augment the projections in
// the lower rows of the orthogonalization.
lindep[col] = false;
if (work_sing[col] < temp) {
lindep[col] = true;
if (col < nvars - 1) {
Arrays.fill(x_sing, 0.0);
int _pi = col * (nvars + nvars - col - 1) / 2;
for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
x_sing[_xi] = r[_pi];
r[_pi] = 0.0;
}
final double y = rhs[col];
final double weight = d[col];
d[col] = 0.0;
rhs[col] = 0.0;
this.include(x_sing, weight, y);
} else {
sserr += d[col] * rhs[col] * rhs[col];
}
}
}
}
Calculates the sum of squared errors for the full regression
and all subsets in the following manner: rss[] ={
ResidualSumOfSquares_allNvars,
ResidualSumOfSquares_FirstNvars-1,
ResidualSumOfSquares_FirstNvars-2,
..., ResidualSumOfSquares_FirstVariable}
/**
* Calculates the sum of squared errors for the full regression
* and all subsets in the following manner: <pre>
* rss[] ={
* ResidualSumOfSquares_allNvars,
* ResidualSumOfSquares_FirstNvars-1,
* ResidualSumOfSquares_FirstNvars-2,
* ..., ResidualSumOfSquares_FirstVariable} </pre>
*/
private void ss() {
double total = sserr;
rss[nvars - 1] = sserr;
for (int i = nvars - 1; i > 0; i--) {
total += d[i] * rhs[i] * rhs[i];
rss[i - 1] = total;
}
rss_set = true;
}
Calculates the cov matrix assuming only the first nreq variables are
included in the calculation. The returned array contains a symmetric
matrix stored in lower triangular form. The matrix will have
( nreq + 1 ) * nreq / 2 elements. For illustration cov =
{
cov_00,
cov_10, cov_11,
cov_20, cov_21, cov22,
...
}
Params: - nreq – how many of the regressors to include (either in canonical
order, or in the current reordered state)
Returns: an array with the variance covariance of the included
regressors in lower triangular form
/**
* Calculates the cov matrix assuming only the first nreq variables are
* included in the calculation. The returned array contains a symmetric
* matrix stored in lower triangular form. The matrix will have
* ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
* cov =
* {
* cov_00,
* cov_10, cov_11,
* cov_20, cov_21, cov22,
* ...
* } </pre>
*
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the variance covariance of the included
* regressors in lower triangular form
*/
private double[] cov(int nreq) {
if (this.nobs <= nreq) {
return null;
}
double rnk = 0.0;
for (int i = 0; i < nreq; i++) {
if (!this.lindep[i]) {
rnk += 1.0;
}
}
final double var = rss[nreq - 1] / (nobs - rnk);
final double[] rinv = new double[nreq * (nreq - 1) / 2];
inverse(rinv, nreq);
final double[] covmat = new double[nreq * (nreq + 1) / 2];
Arrays.fill(covmat, Double.NaN);
int pos2;
int pos1;
int start = 0;
double total = 0;
for (int row = 0; row < nreq; row++) {
pos2 = start;
if (!this.lindep[row]) {
for (int col = row; col < nreq; col++) {
if (!this.lindep[col]) {
pos1 = start + col - row;
if (row == col) {
total = 1.0 / d[col];
} else {
total = rinv[pos1 - 1] / d[col];
}
for (int k = col + 1; k < nreq; k++) {
if (!this.lindep[k]) {
total += rinv[pos1] * rinv[pos2] / d[k];
}
++pos1;
++pos2;
}
covmat[ (col + 1) * col / 2 + row] = total * var;
} else {
pos2 += nreq - col - 1;
}
}
}
start += nreq - row - 1;
}
return covmat;
}
This internal method calculates the inverse of the upper-triangular portion
of the R matrix.
Params: - rinv – the storage for the inverse of r
- nreq – how many of the regressors to include (either in canonical
order, or in the current reordered state)
/**
* This internal method calculates the inverse of the upper-triangular portion
* of the R matrix.
* @param rinv the storage for the inverse of r
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
*/
private void inverse(double[] rinv, int nreq) {
int pos = nreq * (nreq - 1) / 2 - 1;
int pos1 = -1;
int pos2 = -1;
double total = 0.0;
Arrays.fill(rinv, Double.NaN);
for (int row = nreq - 1; row > 0; --row) {
if (!this.lindep[row]) {
final int start = (row - 1) * (nvars + nvars - row) / 2;
for (int col = nreq; col > row; --col) {
pos1 = start;
pos2 = pos;
total = 0.0;
for (int k = row; k < col - 1; k++) {
pos2 += nreq - k - 1;
if (!this.lindep[k]) {
total += -r[pos1] * rinv[pos2];
}
++pos1;
}
rinv[pos] = total - r[pos1];
--pos;
}
} else {
pos -= nreq - row;
}
}
}
In the original algorithm only the partial correlations of the regressors
is returned to the user. In this implementation, we have corr =
{
corrxx - lower triangular
corrxy - bottom row of the matrix
}
Replaces subroutines PCORR and COR of:
ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
Calculate partial correlations after the variables in rows
1, 2, ..., IN have been forced into the regression.
If IN = 1, and the first row of R represents a constant in the
model, then the usual simple correlations are returned.
If IN = 0, the value returned in array CORMAT for the correlation
of variables Xi & Xj is:
sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )
On return, array CORMAT contains the upper triangle of the matrix of
partial correlations stored by rows, excluding the 1's on the diagonal.
e.g. if IN = 2, the consecutive elements returned are:
(3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
Array YCORR stores the partial correlations with the Y-variable
starting with YCORR(IN+1) = partial correlation with the variable in
position (IN+1).
Params: - in – how many of the regressors to include (either in canonical
order, or in the current reordered state)
Returns: an array with the partial correlations of the remainder of
regressors with each other and the regressand, in lower triangular form
/**
* In the original algorithm only the partial correlations of the regressors
* is returned to the user. In this implementation, we have <pre>
* corr =
* {
* corrxx - lower triangular
* corrxy - bottom row of the matrix
* }
* Replaces subroutines PCORR and COR of:
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
*
* <p>Calculate partial correlations after the variables in rows
* 1, 2, ..., IN have been forced into the regression.
* If IN = 1, and the first row of R represents a constant in the
* model, then the usual simple correlations are returned.</p>
*
* <p>If IN = 0, the value returned in array CORMAT for the correlation
* of variables Xi & Xj is: <pre>
* sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
*
* <p>On return, array CORMAT contains the upper triangle of the matrix of
* partial correlations stored by rows, excluding the 1's on the diagonal.
* e.g. if IN = 2, the consecutive elements returned are:
* (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
* Array YCORR stores the partial correlations with the Y-variable
* starting with YCORR(IN+1) = partial correlation with the variable in
* position (IN+1). </p>
*
* @param in how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the partial correlations of the remainder of
* regressors with each other and the regressand, in lower triangular form
*/
public double[] getPartialCorrelations(int in) {
final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
int pos;
int pos1;
int pos2;
final int rms_off = -in;
final int wrk_off = -(in + 1);
final double[] rms = new double[nvars - in];
final double[] work = new double[nvars - in - 1];
double sumxx;
double sumxy;
double sumyy;
final int offXX = (nvars - in) * (nvars - in - 1) / 2;
if (in < -1 || in >= nvars) {
return null;
}
final int nvm = nvars - 1;
final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
if (d[in] > 0.0) {
rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
}
for (int col = in + 1; col < nvars; col++) {
pos = base_pos + col - 1 - in;
sumxx = d[col];
for (int row = in; row < col; row++) {
sumxx += d[row] * r[pos] * r[pos];
pos += nvars - row - 2;
}
if (sumxx > 0.0) {
rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
} else {
rms[col + rms_off] = 0.0;
}
}
sumyy = sserr;
for (int row = in; row < nvars; row++) {
sumyy += d[row] * rhs[row] * rhs[row];
}
if (sumyy > 0.0) {
sumyy = 1.0 / FastMath.sqrt(sumyy);
}
pos = 0;
for (int col1 = in; col1 < nvars; col1++) {
sumxy = 0.0;
Arrays.fill(work, 0.0);
pos1 = base_pos + col1 - in - 1;
for (int row = in; row < col1; row++) {
pos2 = pos1 + 1;
for (int col2 = col1 + 1; col2 < nvars; col2++) {
work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
pos2++;
}
sumxy += d[row] * r[pos1] * rhs[row];
pos1 += nvars - row - 2;
}
pos2 = pos1 + 1;
for (int col2 = col1 + 1; col2 < nvars; col2++) {
work[col2 + wrk_off] += d[col1] * r[pos2];
++pos2;
output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
++pos;
}
sumxy += d[col1] * rhs[col1];
output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
}
return output;
}
ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
Move variable from position FROM to position TO in an
orthogonal reduction produced by AS75.1.
Params: - from – initial position
- to – destination
/**
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
* Move variable from position FROM to position TO in an
* orthogonal reduction produced by AS75.1.
*
* @param from initial position
* @param to destination
*/
private void vmove(int from, int to) {
double d1;
double d2;
double X;
double d1new;
double d2new;
double cbar;
double sbar;
double Y;
int first;
int inc;
int m1;
int m2;
int mp1;
int pos;
boolean bSkipTo40 = false;
if (from == to) {
return;
}
if (!this.rss_set) {
ss();
}
int count = 0;
if (from < to) {
first = from;
inc = 1;
count = to - from;
} else {
first = from - 1;
inc = -1;
count = from - to;
}
int m = first;
int idx = 0;
while (idx < count) {
m1 = m * (nvars + nvars - m - 1) / 2;
m2 = m1 + nvars - m - 1;
mp1 = m + 1;
d1 = d[m];
d2 = d[mp1];
// Special cases.
if (d1 > this.epsilon || d2 > this.epsilon) {
X = r[m1];
if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
X = 0.0;
}
if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
d[m] = d2;
d[mp1] = d1;
r[m1] = 0.0;
for (int col = m + 2; col < nvars; col++) {
++m1;
X = r[m1];
r[m1] = r[m2];
r[m2] = X;
++m2;
}
X = rhs[m];
rhs[m] = rhs[mp1];
rhs[mp1] = X;
bSkipTo40 = true;
//break;
} else if (d2 < this.epsilon) {
d[m] = d1 * X * X;
r[m1] = 1.0 / X;
for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
r[_i] /= X;
}
rhs[m] /= X;
bSkipTo40 = true;
//break;
}
if (!bSkipTo40) {
d1new = d2 + d1 * X * X;
cbar = d2 / d1new;
sbar = X * d1 / d1new;
d2new = d1 * cbar;
d[m] = d1new;
d[mp1] = d2new;
r[m1] = sbar;
for (int col = m + 2; col < nvars; col++) {
++m1;
Y = r[m1];
r[m1] = cbar * r[m2] + sbar * Y;
r[m2] = Y - X * r[m2];
++m2;
}
Y = rhs[m];
rhs[m] = cbar * rhs[mp1] + sbar * Y;
rhs[mp1] = Y - X * rhs[mp1];
}
}
if (m > 0) {
pos = m;
for (int row = 0; row < m; row++) {
X = r[pos];
r[pos] = r[pos - 1];
r[pos - 1] = X;
pos += nvars - row - 2;
}
}
// Adjust variable order (VORDER), the tolerances (TOL) and
// the vector of residual sums of squares (RSS).
m1 = vorder[m];
vorder[m] = vorder[mp1];
vorder[mp1] = m1;
X = tol[m];
tol[m] = tol[mp1];
tol[mp1] = X;
rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
m += inc;
++idx;
}
}
ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
Re-order the variables in an orthogonal reduction produced by
AS75.1 so that the N variables in LIST start at position POS1,
though will not necessarily be in the same order as in LIST.
Any variables in VORDER before position POS1 are not moved.
Auxiliary routine called: VMOVE.
This internal method reorders the regressors.
Params: - list – the regressors to move
- pos1 – where the list will be placed
Returns: -1 error, 0 everything ok
/**
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
*
* <p> Re-order the variables in an orthogonal reduction produced by
* AS75.1 so that the N variables in LIST start at position POS1,
* though will not necessarily be in the same order as in LIST.
* Any variables in VORDER before position POS1 are not moved.
* Auxiliary routine called: VMOVE. </p>
*
* <p>This internal method reorders the regressors.</p>
*
* @param list the regressors to move
* @param pos1 where the list will be placed
* @return -1 error, 0 everything ok
*/
private int reorderRegressors(int[] list, int pos1) {
int next;
int i;
int l;
if (list.length < 1 || list.length > nvars + 1 - pos1) {
return -1;
}
next = pos1;
i = pos1;
while (i < nvars) {
l = vorder[i];
for (int j = 0; j < list.length; j++) {
if (l == list[j] && i > next) {
this.vmove(i, next);
++next;
if (next >= list.length + pos1) {
return 0;
} else {
break;
}
}
}
++i;
}
return 0;
}
Gets the diagonal of the Hat matrix also known as the leverage matrix.
Params: - row_data – returns the diagonal of the hat matrix for this observation
Returns: the diagonal element of the hatmatrix
/**
* Gets the diagonal of the Hat matrix also known as the leverage matrix.
*
* @param row_data returns the diagonal of the hat matrix for this observation
* @return the diagonal element of the hatmatrix
*/
public double getDiagonalOfHatMatrix(double[] row_data) {
double[] wk = new double[this.nvars];
int pos;
double total;
if (row_data.length > nvars) {
return Double.NaN;
}
double[] xrow;
if (this.hasIntercept) {
xrow = new double[row_data.length + 1];
xrow[0] = 1.0;
System.arraycopy(row_data, 0, xrow, 1, row_data.length);
} else {
xrow = row_data;
}
double hii = 0.0;
for (int col = 0; col < xrow.length; col++) {
if (FastMath.sqrt(d[col]) < tol[col]) {
wk[col] = 0.0;
} else {
pos = col - 1;
total = xrow[col];
for (int row = 0; row < col; row++) {
total = smartAdd(total, -wk[row] * r[pos]);
pos += nvars - row - 2;
}
wk[col] = total;
hii = smartAdd(hii, (total * total) / d[col]);
}
}
return hii;
}
Gets the order of the regressors, useful if some type of reordering
has been called. Calling regress with int[]{} args will trigger
a reordering.
Returns: int[] with the current order of the regressors
/**
* Gets the order of the regressors, useful if some type of reordering
* has been called. Calling regress with int[]{} args will trigger
* a reordering.
*
* @return int[] with the current order of the regressors
*/
public int[] getOrderOfRegressors(){
return MathArrays.copyOf(vorder);
}
Conducts a regression on the data in the model, using all regressors.
Throws: - ModelSpecificationException – - thrown if number of observations is
less than the number of variables
Returns: RegressionResults the structure holding all regression results
/**
* Conducts a regression on the data in the model, using all regressors.
*
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables
*/
public RegressionResults regress() throws ModelSpecificationException {
return regress(this.nvars);
}
Conducts a regression on the data in the model, using a subset of regressors.
Params: - numberOfRegressors – many of the regressors to include (either in canonical
order, or in the current reordered state)
Throws: - ModelSpecificationException – - thrown if number of observations is
less than the number of variables or number of regressors requested
is greater than the regressors in the model
Returns: RegressionResults the structure holding all regression results
/**
* Conducts a regression on the data in the model, using a subset of regressors.
*
* @param numberOfRegressors many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables or number of regressors requested
* is greater than the regressors in the model
*/
public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
if (this.nobs <= numberOfRegressors) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, numberOfRegressors);
}
if( numberOfRegressors > this.nvars ){
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
}
tolset();
singcheck();
double[] beta = this.regcf(numberOfRegressors);
ss();
double[] cov = this.cov(numberOfRegressors);
int rnk = 0;
for (int i = 0; i < this.lindep.length; i++) {
if (!this.lindep[i]) {
++rnk;
}
}
boolean needsReorder = false;
for (int i = 0; i < numberOfRegressors; i++) {
if (this.vorder[i] != i) {
needsReorder = true;
break;
}
}
if (!needsReorder) {
return new RegressionResults(
beta, new double[][]{cov}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
} else {
double[] betaNew = new double[beta.length];
double[] covNew = new double[cov.length];
int[] newIndices = new int[beta.length];
for (int i = 0; i < nvars; i++) {
for (int j = 0; j < numberOfRegressors; j++) {
if (this.vorder[j] == i) {
betaNew[i] = beta[ j];
newIndices[i] = j;
}
}
}
int idx1 = 0;
int idx2;
int _i;
int _j;
for (int i = 0; i < beta.length; i++) {
_i = newIndices[i];
for (int j = 0; j <= i; j++, idx1++) {
_j = newIndices[j];
if (_i > _j) {
idx2 = _i * (_i + 1) / 2 + _j;
} else {
idx2 = _j * (_j + 1) / 2 + _i;
}
covNew[idx1] = cov[idx2];
}
}
return new RegressionResults(
betaNew, new double[][]{covNew}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
}
}
Conducts a regression on the data in the model, using regressors in array
Calling this method will change the internal order of the regressors
and care is required in interpreting the hatmatrix.
Params: - variablesToInclude – array of variables to include in regression
Throws: - ModelSpecificationException – - thrown if number of observations is
less than the number of variables, the number of regressors requested
is greater than the regressors in the model or a regressor index in
regressor array does not exist
Returns: RegressionResults the structure holding all regression results
/**
* Conducts a regression on the data in the model, using regressors in array
* Calling this method will change the internal order of the regressors
* and care is required in interpreting the hatmatrix.
*
* @param variablesToInclude array of variables to include in regression
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables, the number of regressors requested
* is greater than the regressors in the model or a regressor index in
* regressor array does not exist
*/
public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
if (variablesToInclude.length > this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
}
if (this.nobs <= this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, this.nvars);
}
Arrays.sort(variablesToInclude);
int iExclude = 0;
for (int i = 0; i < variablesToInclude.length; i++) {
if (i >= this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
}
if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
variablesToInclude[i] = -1;
++iExclude;
}
}
int[] series;
if (iExclude > 0) {
int j = 0;
series = new int[variablesToInclude.length - iExclude];
for (int i = 0; i < variablesToInclude.length; i++) {
if (variablesToInclude[i] > -1) {
series[j] = variablesToInclude[i];
++j;
}
}
} else {
series = variablesToInclude;
}
reorderRegressors(series, 0);
tolset();
singcheck();
double[] beta = this.regcf(series.length);
ss();
double[] cov = this.cov(series.length);
int rnk = 0;
for (int i = 0; i < this.lindep.length; i++) {
if (!this.lindep[i]) {
++rnk;
}
}
boolean needsReorder = false;
for (int i = 0; i < this.nvars; i++) {
if (this.vorder[i] != series[i]) {
needsReorder = true;
break;
}
}
if (!needsReorder) {
return new RegressionResults(
beta, new double[][]{cov}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
} else {
double[] betaNew = new double[beta.length];
int[] newIndices = new int[beta.length];
for (int i = 0; i < series.length; i++) {
for (int j = 0; j < this.vorder.length; j++) {
if (this.vorder[j] == series[i]) {
betaNew[i] = beta[ j];
newIndices[i] = j;
}
}
}
double[] covNew = new double[cov.length];
int idx1 = 0;
int idx2;
int _i;
int _j;
for (int i = 0; i < beta.length; i++) {
_i = newIndices[i];
for (int j = 0; j <= i; j++, idx1++) {
_j = newIndices[j];
if (_i > _j) {
idx2 = _i * (_i + 1) / 2 + _j;
} else {
idx2 = _j * (_j + 1) / 2 + _i;
}
covNew[idx1] = cov[idx2];
}
}
return new RegressionResults(
betaNew, new double[][]{covNew}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
}
}
}