/* Copyright (C) 1991-2000 Simon N. Wood snw@st-and.ac.uk This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. (www.gnu.org/copyleft/gpl.html) You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/ /* Routines for basic matrix manipulation creation, destruction and file i/o for matrices. See end of file for update log */ #include #include #include #include #include #include "matrix.h" #define RANGECHECK #define PAD 1L matrix null_mat; /* matrix for passing when you don't actually need to */ #define PADCON (-1.234565433647588392902028934e270) /* counter for memory used */ long memused=0L,matrallocd=0L; /* the routines */ struct mrec { matrix mat; struct mrec *fp,*bp; }; typedef struct mrec MREC; void ErrorMessage(char *msg,int fatal); matrix null_mat; MREC *top,*bottom; matrix initmat(rows,cols) long rows,cols; /* Don't alter this without altering freemat() as well !! */ { matrix A;long i,j,pad; #ifdef RANGECHECK pad=PAD; #else pad=0L; #endif A.vec=0; /* if (rows*cols==0L) { A.original_r=A.r=rows;A.original_c=A.c=cols;A.mem=0L;A.vec=0; A.M=(double **)NULL;A.V=(double *)NULL;return(A); } */ A.M=(double **)calloc((size_t)rows+2*pad,sizeof(double *)); if (((cols==1L)||(rows==1L))&&((cols*(long)rows+2*pad)*(long)sizeof(double)<=65535)) { if (A.M) A.M[0]=(double *)calloc((size_t)cols*rows+2*pad,sizeof(double)); for (i=1L;i0L)) { ErrorMessage("Failed to initialize memory for matrix.",1);} if (pad) /* This lot is debugging code that checks out matrix errors on allocation and release */ { if (A.vec) { A.V=A.M[0];for (i=0;imat=top->mat=A;top->bp=bottom;bottom->fp=top; } else /* expanding the linked list by one */ { top->fp=(MREC *)calloc(1,sizeof(MREC)); top->fp->mat=A;top->fp->bp=top;top=top->fp; /* crystal clear, no? */ } } A.V=A.M[0];/* This allows vectors to be accessed using A.V[i] */ return(A); } matrix initvec(rows) long rows; { return(initmat(1L,rows));} void freemat(A) matrix A; { long i,j,pad;int ok=1; MREC *delet; #ifdef RANGECHECK pad=PAD; #else pad=0L; #endif /* if (A.original_r*A.original_c!=0L) */ { if (pad) { if (A.vec) { for (i=-pad;i<0;i++) if ((A.V[i]!=PADCON)||(A.V[i+A.original_r*A.original_c+pad]!=PADCON)) ok=0; } else { for (i=-pad;imat.M!=A.M)) { i++;delet=delet->fp;} if (i==matrallocd) { ErrorMessage("INTEGRITY PROBLEM in the extant matrix list.",1); } else { if (i) delet->bp->fp=delet->fp; else bottom=delet->fp; if (i!=matrallocd-1) delet->fp->bp=delet->bp; else top=delet->bp; free(delet); } /* repositioning pointers so that what was allocated gets freed */ if (!A.vec) for (i=0;imat; if (A.vec) { for (i=-pad;i<0;i++) if ((A.V[i]!=PADCON)||(A.V[i+A.original_r*A.original_c+pad]!=PADCON)) ok=0; } else { for (i=-pad;ifp; } } long fsafewrite(ptr,size,n,stream) double *ptr;size_t size;long n;FILE *stream; { long i,j,k=0L; for (i=0;i<(n/32000L);i++) k+=fwrite(ptr+i*32000L,size,(size_t)32000L,stream); j=n%32000L; k+=fwrite(ptr+i*32000L,size,(size_t)j,stream); return(k); } long fsaferead(ptr,size,n,stream) double *ptr;size_t size;long n;FILE *stream; { long i,j=32000L,k=0L; for (i=0;i<(n/32000L);i++) k+=fread(ptr+i*32000L,size,(size_t)j,stream); j=n%32000L; k+=fread(ptr+i*32000L,size,(size_t)j,stream); return(k); } void dumpmat(M,filename) matrix M;char *filename; { FILE *out; long i,j=0L; out=fopen(filename,"wb"); j+=fwrite(&(M.r),sizeof(long),1,out);j+=fwrite(&(M.c),sizeof(long),1,out); for (i=0;ir;k++) { i=fsaferead((*M).M[k],sizeof(double),M->c,in); if (i!=M->c) { sprintf(str,"\n problem with %s !!",filename);ErrorMessage(str,1);} } fclose(in); } matrix vecmult(A,x,t) matrix A,x;int t; /* Optimised multiplication of x by A. where x is a column vector and A a matrix Result returns Ax and FREES x BEFORE RETURNING - so usual use is x=vecmult(A,x,t) */ { matrix y; double *yV,*xV,**AM,*a; long Ar,Ac,i,j; if (t) y.r=A.c; else y.r=A.r; y=initmat(y.r,1L);yV=y.V;AM=A.M;Ar=A.r;Ac=A.c; if (t) { for (i=0;ir;br=b->r; AM=A->M;bV=b->V;cV=c->V; if (t) /* then A transposed */ for (i=0;ir>B->r||A->c>B->c) ErrorMessage("Target matrix too small in mcopy",1); BM=B->M;Ac=A->c; for (AM=A->M;AMM+A->r;AM++) { pB= *BM; for (pA= *AM;pA< *AM+Ac; pA++) *(pB++) = *pA; BM++; } } void matmult(C,A,B,tA,tB) matrix C,A,B;int tA,tB; /* Puts A*B in C. A will be transposed in this calculation if tA is not zero. B will be transposed if tB is not zero */ { long i,j,k; double temp,*p,*p1,*p2,**CM,**AM,**BM; AM=A.M;BM=B.M;CM=C.M; /* Saves address calculation involved in C.M */ if (tA) { if (tB) { if ((A.r!=B.c)||(A.c!=C.r)||(B.r!=C.c)) { ErrorMessage("Incompatible matrices in matmult.",1);} for (i=0;i2) temp0=initmat(r,c);else temp0=C; matmult(temp0,M[0],M[1],t[0],t[1]); for (i=1;i2) { matmult(C,temp0,M[n-1],0,t[n-1]); freemat(temp0); } free(t); free(M); va_end(argptr); } void mad(C,A,B,mA,mB) matrix C,A,B;double mA,mB; /* forms sum C=A+B. address optimization added 27/10/94 */ { long i; double *pA,*pB,*pC; if (C.vec) { pB=B.V;pA=A.V; for (pC=C.V;pCr!=A->c) ErrorMessage("Attempt to invert() non-square matrix",1); c=(int *)calloc((size_t)A->c,sizeof(int)); /* index of columns, used for column pivoting */ d=(int *)calloc((size_t)A->c,sizeof(int)); rp=(int *)calloc((size_t)A->c,sizeof(int)); /* row changes */ cp=(int *)calloc((size_t)A->c,sizeof(int)); /* row changes */ for (i=0;ic;i++) { c[i]=i;d[i]=i;} AM=A->M; /* saving adress calculations*/ for (j=0;jc;j++) /* loop through columns to be reduced */ { max=0.0; for (i=j;ir;i++) /* loop through rows to search for pivot */ { p=AM[i]; for (k=j;kc;k++) /* loop through cols to search for pivot */ { x=p[c[k]];if (fabs(x)>max) { max=fabs(x);pr=i;pc=k;}} } /* now move pivot to element j,j */ p=AM[j];AM[j]=AM[pr];AM[pr]=p; /* rows exchanged */ k=c[j];c[j]=c[pc];c[pc]=k; /* columns exchanged */ rp[j]=pr; /* stores row pivoted with */ cp[j]=pc; /* stores column pivoted with */ cj=c[j]; // save time /* Now reduce the column */ x=AM[j][cj]; if (x==0.0) ErrorMessage("Singular Matrix passed to invert()",1); for (p=AM[j];pc;p++) *p/=x; /* divide row j by pivot element */ AM[j][cj]=1.0/x; for (i=0;ir;i++) /* work down rows eliminating column j */ { p=AM[i];p1=AM[j]; if (i!=j) { x = -p[cj]; /* multiplier for this row */ for (k=0;kc;k++) /* cols of A */ { ck=c[k];p[ck]+=x*p1[ck];} } } } for (i=A->r-1;i>=0;i--) //work down through column re-ordering { if (cp[i]!=i) { p=AM[i];AM[i]=AM[cp[i]];AM[cp[i]]=p; // row exchange } } for (j=0;jc-1;j++) // implement column exchange if (c[j]!=j) { if (c[j]r;i++) { p=AM[i];x=p[j];p[j]=p[k];p[k]=x;} d[k]=d[j];d[j]=c[j]; c[d[k]]=k; } for (i=A->r-1;i>=0;i--) // column exchange implied by row re-ordering if (rp[i]!=i) { for (k=0;kr;k++) { p=AM[k];x=p[i];p[i]=p[rp[i]];p[rp[i]]=x;} // column exchange } free(c);free(rp);free(cp);free(d); } double triTrInvLL(matrix *l0,matrix *l1) /* this routine finds the trace of inv(LL') where L is a bi-diagonal lower triangular choleski factor, with leading diagonal l0 and leading sub diagonal l1. */ { double trC,x,z,*l1V,*l0V; long i,l0r; l0V=l0->V;l1V=l1->V;l0r=l0->r; x=l0V[l0r-1];z=1.0/(x*x);trC=z; for (i=l0r-2;i>=0;i--) { x=l1V[i];z=(1.0+x*x*z);x=l0V[i]; z/=x*x;trC+=z; } return(trC); } void bicholeskisolve(matrix *A,matrix *B,matrix *l0,matrix *l1) /* solves LL'A=B where L is a matrix with leading diagonal l0 and leading sub-diagonal l1. The algorithm is O(n^2) and hence much better than performing the same calculation by inversion of L (since inverse of L is fully lower triangular, rather than banded) */ { long i,j; double *pA1,*pA,*pB,lv0,lv1; /* solve LC=B (C actually stored in A) */ pA=A->M[0];pB=B->M[0];lv0=l0->V[0]; for (i=0;ic;i++) pA[i]=pB[i]/lv0; for (i=1;ir;i++) { lv0=l0->V[i];lv1=l1->V[i-1];pA1=pA;pA=A->M[i];pB=B->M[i]; for (j=0;jc;j++) pA[j]=(pB[j]-lv1*pA1[j])/lv0; } /* now solve L'A=C */ pA=A->M[A->r-1]; lv0=l0->V[l0->r-1]; for (i=0;ic;i++) pA[i]/=lv0; for (i=A->r-2;i>=0;i--) { pA1=pA;pA=A->M[i]; lv0=l0->V[i];lv1=l1->V[i]; for (j=0;jc;j++) pA[j]=(pA[j]-lv1*pA1[j])/lv0; } } void tricholeski(matrix *T,matrix *l0,matrix *l1) /* Routine for choleski decomposition of the tridiagonal matrix T. If L is the matrix with leading diagonal in vector l0 and leading subdiagonal in l1, then T=LL'. The routine is O(n). Note that not only is multiplication of a matrix by L 0(n^2), but formation of A=inv(L)B can be done in O(n^2) by solution of LA=B. Note also that the trace of the inverse of a tri-diagonal matrix can be found cheaply using Elden's trick - see p137 of Wahba. 20/11/99: Now has steps to deal with +ve semi definite matrices by a slight modification of the choleski decomposition. The modification zeros elements of the decomposition, as needed. The zeroing steps are valid for +ve semi-definite (non-negative definite) matrices (only), for the following reasons: 0. a +ve semi definite matrix that is not +ve definite, must have zero eigenvalues as is easily seen by spectral decomposition. 1. A properly tri-diagonal symmetric rank m (sub)matrix has m or m+1 non-zero rows and columns. This can be seen by counting up the number of possible independent rows and columns of an m+1 by m+1 properly tri-diagonal matrix. 2. So a +ve semi-definite properly tridiagonal (sub) matrix has at most 1 non-independent row/col, i.e. at most one zero eigenvalue. 3. When present, this non-independence leads to a zero on the final element of the leading diagonal of the choleski decomposition of a properly tridiagonal +ve semi-definite (sub)matrix (I thought I had a proof of this, but am now not sure!). 4. Zeroing outside properly tridiagonal sections of the matrix is clearly ok. Note that these arguments only hold for +ve semi-definite tri-diagonal matrices. Merely being symmetric won't do, and neither will merely being +ve semi-definite! NOTE: that this routine will not detect none +ve semi-definite matrices, the only way to do that is to check whether the decomposition actually works (it won't for a matrix that isn't +ve semi-definite) */ { double **TM,*l1V,*l0V,z=1.0; long k1,k,tr1; TM=T->M;l0V=l0->V;l1V=l1->V; l0V[0]=sqrt(TM[0][0]);tr1=T->r-1; for (k=1;kr;k++) { k1=k-1; if (z>0.0) l1V[k1]=TM[k][k1]/l0V[k1]; // no problem else l1V[k1]=0.0; // assume TM[k][k1]=0, so no problem z=l1V[k1];z=TM[k][k]-z*z; if (z>0.0) l0V[k]=sqrt(z); else l0V[k]=0.0; } } void choleski(A,L,invert,invout) matrix A,L;int invert,invout; /* version that halts execution on failure */ { if (!chol(A,L,invert,invout)) ErrorMessage("Not a +ve def. matrix in choleski().",1); } int chol(A,L,invert,invout) matrix A,L;int invert,invout; /* Routine to perform inversion and Choleski decomposition A=LL' where L is */ /* LOWER triangular (zero elements are to the top right).Inversion is */ /* performed only if invert is not zero. If invout is non-zero then A will */ /* be returned containing the inverse of A passed to the routine, otherwise */ /* L will contain the inverse of choleski decomposition (so that inverse of */ /* A is L'L) but A will be unchanged. If invert is zero invout is ignored. */ /* returns 1 for a +ve def. matrix: 0 otherwise.... */ { long i,j,k; double sigmaL,*p,*p1,temp,**AM,**LM,**invM,z; matrix inv; AM=A.M;LM=L.M; /* A.M replaced by AM and L.M replaced by LM to save address calc. */ for (i=0;i-1.0) si=alpha/(1.0+sqrt(1.0+alpha*sj)); else si=alpha; for (j=0;j0.0) rj2=sqrt(rj2); else rj2=2e-15; si*=(1.0+rj2)/(rj2*(tj+rj2)); for (i=j+1;i=0;i--) { zsum=0.0; for (j=(i+1);j2) r=cov(s,t); else r=0.0; return(r); } double dot(a,b) matrix a,b; { long i,k=0L;double c=0.0,*p,*p1; if (a.vec) { p1=b.V;for (p=a.V;pM; for (i=0;ir;i++) tr+=p[i][i]; return(tr); } double absdev(a) matrix a; /* gets mean absolute deviation from mean */ { int i; double m,d=0.0,*aV; aV=a.V; m=mean(a); for (i=0;im) m=y; } else for (i=0;im) m=y;}// m=max(m,fabs(*p)); if (!m) return(0.0); if (d.vec) for (p=d.V;pmaxsum) maxsum=sum; } return(maxsum); } void householder(u,a,b,t1) matrix *u,a,b;long t1; /* transforms a to b, iff they are of equal Euclidian length. u is the (t1+1) vector such that the full post multiplying householder matrix is H' = [ I - vv' ] where v' = [u',0] where 0 is a vector of zeroes. */ { long i;double v,*aV,*bV,*uV; aV=a.V;bV=b.V;uV=u->V; u->r=t1+1; for (i=0;ir;i++) uV[i]=aV[i]-bV[i]; v=enorm((*u))/sqrt(2.0); for (i=0;ir;i++) uV[i]/=v; } void Hmult(C,u) matrix C,u; /* This routine is for post multiplication by Housholder matrices only */ { double temp,*p,*p1,*uV,**CuM,**CM; long i,j; matrix Cu; Cu=initmat(C.r,u.c); uV=u.V;CuM=Cu.M;CM=C.M; for (i=0;i CQ; p==0,t==1 => CQ'; p==1,t==0 => QC; p==1,t==1 => Q'C Tested for premultiplication by untransposed matrix (p==1, t==0) other combinations untested 15/8/96 (speed up awesome!) NOTE that the routine expects C to be compatible with the Hi's - if this routine is being used for projection in and out of Null spaces, then make sure that C is appropriately packed with zeroes. If appropriate zero packing conventions have been used then OrthMult() is more efficient.... */ { double *u,*CuV,**CM; matrix Cu; long i,j,k; if (p) Cu=initmat(C.c,1L);else Cu=initmat(C.r,1L); CuV=Cu.V;CM=C.M; if (p) { if (t) { for (k=0;k=0;k--) /* loop through the householder matrices */ { u=U.M[k]; for (i=0;i=0;k--) /* loop through the householder matrices */ { u=U.M[k]; for (i=0;i0) { for (i=0;im) m=x;} // scale factor if (m) for (j=0;jr; for (i=n-1;i>=0;i--) { Lii=1.0/R->M[i][i]; for (j=n-1;j>i;j--) { s=0.0; for (k=i+1;k<=j;k++) s+=R->M[i][k]*R->M[k][j]; R->M[i][j] = -s/R->M[i][i]; } R->M[i][i]=Lii; } } void OrthoMult(matrix *Q,matrix *A,int off,int rows,int t,int pre,int o_pre) /* The first `rows' of Q are vectors for a householder transformation. The ith vector starts with i+off zero elements (i starts at 0). The vectors are stored in the order that they should be applied. If o_pre==1 then they originally pre-multiplied, otherwise they originally post multiplied. if t==1 then the transform is transposed. If pre==1 then it is applied from the left. Each householder transform has the same form: (I-uu') (treating u as a column vector). The transformation is applied to A. */ { double au,*u,*a,**AtM,**AM,**QM; long i,j,k,Ar,Qc,kk; matrix At; if (o_pre) t=1-t; /* default assumption is that creation was for post mult. */ if (pre) /* use fact that QA=(A'Q')' and Q'A=(A'Q)' */ { At=initmat(A->c,A->r); AM=A->M;AtM=At.M; for (i=0;ir;i++) for (j=0;jc;j++) AtM[j][i]=AM[i][j]; t=1-t; } else At=*A; AM=At.M;QM=Q->M;Ar=At.r;Qc=Q->c; for (kk=0;kkM; for (i=0;iV;yV=y->V; for (i=R->r-1;i>=0;i--) { RMi=R->M[i]; x=0.0;for (j=i+1;jr;j++) x+=RMi[j]*pV[j]; pV[i]=(yV[i]-x)/RMi[i]; } } int QR(matrix *Q,matrix *R) /* Does a QR factorisation of the matrix supplied in R. In Q the householder vectors are supplied to perform the transformation QR(in) -> R(out) R(out) is upper triangular (elements are 0 below leading diagonal). If Q->r is none zero then the vectors u are stored in successive rows of Q. The u vectors make up Q as a series of (stable) householder transformations. (I-uu'). The transformations are to be applied from the left in row order. The first i elements of the ith u are zero (i starting at zero). If A is the matrix input in R then QA=R, so that A=Q'R. Q can be used with OrthoMult(). Under/overflow avoidance added 13/1/2000 along with more efficient calculation of length of u (modifications tested). */ { long i,j,k,n,Rr; double *u,t,z,**RM,*p,m; RM=R->M;Rr=R->r; if (Rrc) n=Rr; else n=R->c; u=(double *)calloc((size_t)Rr,sizeof(double)); for (k=0;km) m=z;} if (m) for (i=k;i0.0) t = -sqrt(t);else t= sqrt(t); /* value of new RM[k][k] (stable) */ for (i=k+1;ic;j++) { t=0.0;for (i=k;ir) /* store vectors u for making Q */ { p=Q->M[k]; for (i=k;iM; if (col) for (k=0;kr;k++) { t=MM[k][i];MM[k][i]=MM[k][j];MM[k][j]=t;} else for (k=0;kc;k++) { t=MM[i][k];MM[i][k]=MM[j][k];MM[j][k]=t;} } void UTU(matrix *T,matrix *U) /* does orthogonal tridiagonalisation of the symmetric matrix supplied in T; U is returned with successive householder vectors in successive rows of U. The first i+1 elements of the ith row will be zero (i starts at 0). There are only T->r - 2 non-zero rows. The transformations must be applied in order from the right. Recall that a householder transformation to take a->b is constructed as follows: u=a-b; u=u/sqrt(u'u/2); H=(I-uu'); then Ha=b and a'H=b' (of course to form Ha... form u'a; form u(u'a); form a-u(u'a); never form H first!). If A is the input matrix then U'AU=T => A=UTU' Underflow and overflow protection added 13/1/2000, and lt efficiency improved. Improved code tested with variety of random matrices. (Householder rotations are stable.) OrthoMult() works with U storage convention used here. */ { long i,j,k; double *u,*t,lt,x,m; for (i=0;ir-2;i++) { u=U->M[i];t=T->M[i];lt=0.0; m=0.0;for (j=i+1;jc;j++) { x=fabs(t[j]); if (mc;j++) t[j]/=m; // avoid over/underflow for (j=i+1;jc;j++) lt+=t[j]*t[j]; if (t[i+1]>0.0) lt= -sqrt(lt);else lt=sqrt(lt); /* ensures stability (by maximising element i+1 of u) */ x=t[i+1]; // stored for altering lt efficiently u[i+1]=lt-t[i+1];T->M[i+1][i]=t[i+1]=lt*m; lt*=lt;lt+= -x*x+u[i+1]*u[i+1]; for (j=i+2;jc;j++) { u[j]= -t[j];T->M[j][i]=t[j]=0.0;} if (lt>0.0) /* only do this if u is non-zero */ { lt=sqrt(0.5*lt); for (j=i+1;jc;j++) u[j]/=lt; } for (j=i+1;jc;j++) /* apply rotations to remaining rows */ { t=T->M[j];lt=0.0; for (k=i+1;kc;k++) lt+=u[k]*t[k]; for (k=i+1;kc;k++) t[k] -= u[k]*lt; } /* Apply rotations from the left */ for (j=i+1;jc;j++) { lt=0.0; for (k=i+1;kc;k++) lt+=u[k]*T->M[k][j]; for (k=i+1;kc;k++) T->M[k][j] -= u[k]*lt; } } } void root(matrix *M,matrix *C,double tol) /* produces a square root of non-negative definite M, by obtaining M=UTU', then getting the choleski decomposition of the non-zero part of T. T=LL' so C=UL and M=CC'.... C will have as few columns as possible. C is initialised in this routine. Upgraded 20/11/99: Previous version assumed that zero rows and columns only occured at the end of T. This is no longer the case. tricholeski() has been modified to deal with non-negative definite T (although this may still be weakest link). If L contains a column of zeros then this column is ommitted from C altogether. zero is judged relative to tol multiplied by the maximum element on the leading diagonal of L if tol>0, otherwise zero is any number that leads to no change in the maximum element when added to it. Algorithm appears to be substantially better than svdroot() (and should be much quicker). Commented out code is the old code, left in in case of future difficulties: when it is removed some further tidying could be done. 16/1/2000: tested with variety of random rank deficient matrices, and theoretical basis re-checked more carefully - see tricholeski() comments. */ { matrix T,U,u0,u1; long i,j,k,rows; int fswap=0,ok; double max,uc,*u,x,m; T=initmat(M->r,M->c); U=initmat(M->r,M->c); for (i=0;iM[i][j]; UTU(&T,&U); // make absolutely symmetric for (i=0;imax) max=fabs(T.M[i][i]); ok=1;x=u0.V[0]*u0.V[0]-T.M[0][0];m=0.0; if (x>m) m=x; for (i=1;im) { m=x;k=i;} x=u1.V[i-1]*u1.V[i-1]+u0.V[i]*u0.V[i]-T.M[i][i];x=fabs(x); if (x>m) { m=x;k=i;} } if (m>1e-14*max) ok=0; if (!ok) { ErrorMessage("Using svd to find root of penalty!",0); (*C)=svdroot(*M,tol); freemat(U);freemat(T);freemat(u0);freemat(u1); return; } freemat(T); T=initmat(U.r,u0.r); /* now apply householder rotations from the left to obtain C=UL */ for (i=0;i=0;i--) { u=U.M[i]; /* first i+1 elements of u are zero */ for (j=0;jtol*max)||(fabs(T.M[i][j])>tol*max)) {ok=1;break;}} if (ok) // then include this column { for (i=0;ir;i++) C->M[i][k]=T.M[i][j]; k++; } } C->c=k; if (fswap) { interchange(C,1L,0L,0);} freemat(T);freemat(U);freemat(u0);freemat(u1); } void bidiag(matrix *A,matrix *wl,matrix *ws,matrix *V) /* This routine bi-diagonalizes A using Housholder rotations applied from left and right. wl and ws are vectors of dimension A.c and A.c-1 respectively. V is an orthogonal A.c by A.c matrix. Let W be the matrix with wl as leading diagonal and ws as leading superdiagonal, then: A = UWV' where U is orthogonal and output in A. The routine's main use is as the first stage in a singular value decomposition of A. The Orthogonal matrices are composed of Householder rotations applied left, right, left, right, left, right etc. The left rotations (reflectors) zero elements of A in columns below the leading diagonal starting from the left. The right reflectors zero elements of A in rows left of the first super diagonal starting from the top. Reflectors are calculated as described on p149 of Watkins (1991) Fundamentals of Matrix Computations, Wiley (but note that \gamma<-1/\sigma x_1 should read \gamma<-(\sigma x_1 )!!). Reflectors are of the form H=(I-g*uu'), where u is a vector and g a scalar. This routine has been tested on a variety of random matrices of differing dimensions as well as on strictly singular matrices. The tests checked that A = UWV'. Most important address optimization has been performed. 9/1/2000 */ { double m,s,g,temp,**AM,**VM,*p,*p1; int i,j,k,nv,nu; nv=0; // counts up number of v_i's AM=A->M;VM=V->M; for (i=0;ic;i++) { wl->V[i]=0.0;if (ic-1) ws->V[i]=0.0; if (ir) /* zero subdiagonal column i */ { m=0.0;for (j=i;jr;j++) { s=fabs(AM[j][i]); if (s>m) m=s;} // max of column for scaling if (m==0.0) g=0.0; else /* work out reflector elements */ { s=0.0;for (j=i;jr;j++) { AM[j][i]/=m;s+=AM[j][i]*AM[j][i];} // scale reflector etc. s=sqrt(s); if (AM[i][i]<0.0) s = -s; // avoid cancellation error AM[i][i]+=s; g=1/(AM[i][i]*s); s*=m; } // Now u is stored in rows i to A.r of column i of A wl->V[i] = -s; VM[i][i] = g; // temporary storage for g, for use in later assembly of U // must apply reflector to remaining columns for (j=i+1;jc;j++) { s=0.0;for (k=i;kr;k++) s+=AM[k][i]*AM[k][j]; s*=g;for (k=i;kr;k++) AM[k][j] += -s*AM[k][i]; } } /* zero elements i+2 to A.c of row i ..... */ if ((ir) && (ic-1)) { m=0.0;//for (j=i+1;jc;j++) { s=fabs(AM[i][j]); if (s>m) m=s;} // max for scaling for (p=AM[i]+i+1;pc;p++) { s=fabs(*p);if (s>m) m=s;} // max for scaling if (m==0.0) g=0.0; else { s=0.0;//for (j=i+1;jc;j++) { AM[i][j]/=m;s+=AM[i][j]*AM[i][j];} for (p=AM[i]+i+1;pc;p++) { *p/=m;s+=(*p)*(*p);} s=sqrt(s); if (AM[i][i+1]<0.0) s = -s; // avoid cancellation error AM[i][i+1] += s; g=1.0/(AM[i][i+1]*s); s*=m; } ws->V[i] = -s; VM[i][i+1]=g; // temporary storage // Now apply reflector to remaining rows for (j=i+1;jr;j++) { s=0.0;//for (k=i+1;kc;k++) s+=AM[i][k]*AM[j][k]; p1=AM[j]+i+1;for (p=AM[i]+i+1;pc;p++) { s+=(*p)*(*p1);p1++;} s*=g;//for (k=i+1;kc;k++) AM[j][k] += -s*AM[i][k]; p1=AM[j]+i+1;for (p=AM[i]+i+1;pc;p++) { *p1 += -s*(*p);p1++;} } nv++; // number of v_i's } } /* At this point wl and ws are complete, but U and V are stored in A as reflector vectors, with associated g values stored on the leading diagonal and leading superdiagonal of V. Now form U and V. U is the first A.c columns of U_1 U_2 U_3 .... i.e. U_1 U_2 U_3 ... [I,0]'(same dim as A) where U_i = (I - g_i u_i u_i'): g_i is in V->M[i][i] and u_i is zero up to element i, with the remaining elements stored in rows i to A.r of column i of A. V= V_1 V_2 V_3 ... where V_i = (I-d_i v_i v_i') where d_i=V->M[i][i+1]. v_i is zero until element i+1, with remaining elements stored in columns i+1 to A.c of row i of A. */ /* first form V in order to free up space in A */ // initialize rows nv to A->c of V nu=A->c; if (A->rr; // number of U_i's for (i=nv+1;ic;i++) for (p=VM[i];pc;p++) *p=0.0; for (i=A->c-1;i>nv;i--) { if (i=0;i--) // working down through the V_i's { temp=VM[i+1][i+1]; // for (j=0;jc;j++) VM[i+1][j]=0.0; for (p=VM[i+1];pc;p++) *p=0.0; VM[i+1][i+1]=1.0; // initialize row of V for (j=A->c-1;j>i;j--) // columns affected by V_i { s=0.0;p=AM[i]+i+1;for (k=i+1;kc;k++) { s+=VM[k][j]*(*p);p++;} s*=VM[i][i+1]; p=AM[i]+i+1;for (k=i+1;kc;k++) { VM[k][j] += -s*(*p);p++;} } AM[i][i+1]=temp; // temporary storage for g_i's } /* Now all but first row and column of V are formed, but V->M[0][0] still contains g_0, while g_i is in AM[i-1][i] otherwise, so form U now and then finish off V */ for (i=nu-1;i>=0;i--) // work down through the u_i's { if (i>0) g=AM[i-1][i]; else g=VM[0][0]; for (j=0;jc-1;j>i;j--) // columns above i are affected { s=0.0;for (k=i;kr;k++) s+= AM[k][i]*AM[k][j]; s*=g; for (k=i;kr;k++) AM[k][j] += -s*AM[k][i]; } // as is column i itself.... for (j=A->r-1;j>i;j--) AM[j][i]*= -g*AM[i][i]; AM[i][i] = 1 - g*AM[i][i]*AM[i][i]; } // now finish off V p=VM[0];for (i=0;ic;i++) { *p=VM[i][0]=0.0;p++;} VM[0][0]=1.0; } svdcheck(matrix *U,matrix *w,matrix *ws,matrix *wl,matrix *V) /* this is a debugging routine for checking that svd() has not messed up the decomposition, by checking that the bidiagonal decomposition is still ok. */ { matrix W,A; int i; W=initmat(w->r,w->r); for (i=0;ir-1;i++) { W.M[i][i]=w->V[i]; W.M[i][i+1]=ws->V[i]; W.M[i+1][i]=wl->V[i]; } W.M[i][i]=w->V[i]; A=initmat(U->r,U->c); multi(3,A,*U,W,*V,0,0,1); printmat(W," %7.3g"); printmat(A," %7.3g"); freemat(A);freemat(W); getc(stdin); } void svd(matrix *A, matrix *w, matrix *V) /* This routine produces a singular value decomposition of A. On exit V will be an A.c by A.c orthogonal matrix, w will be a vector of A.c singular values and A will contain U of the same dimension as A such that U'U=I. If W is the diagonal matrix with w as its leading diagonal then: A=UWV' - the singluar value decomposition. This routine is based on: Watkins (1991) Fundamentals of Matrix Computations, Wiley. (see section 7.2) The algorithm has 2 steps: 1. Bi-diagonalise A using reflectors (Householder transformations) from left and right - this is achieved by routine bidiag(), above. 2. Find singular values of Bi-diagonal matrix. Because the bi-diagonal matrix may not always be properly bidiagonal step 2, is a little complicated.... i) Deflate the problem if possible, which may involve zeroing an element of the super-diagonal. ii) Check whether (deflated) bi-diagonal matrix can be partioned, if so find start of final partition (again may need to zero an element on the super diagonal) iii) Apply iteration of implicit QR algorithm (p405 Watkins) to the sub matrix identified above. iv) Return to (i) assuming that there are singular values left to find. Note that the Givens Rotators used here are calculated as follows to avoid rounding problems: assume xj to be zeroed into xi: m = max(fabs(xi),fabs(xj)); xi/=m;xj/=m r=sqrt(xi*xi+xj*xj); c=xi/r;s=xj/r; xi=m*r;xj=0.0; (c and s can obviously be applied to other vectors without needing to know m.) See page 271 of Watkins, for suggestion of how to test for zeroes. Most important address optimization has been done (stopped when it was as efficient as Numerical Recipes code). (Commented out code is pre-optimization, left in for readability) Tested against NR routine for a variety of random matrices and matrices that are rank deficient in various ways. Check for m>0.0 added 15/5/00 - otherwise division by zero is possible, leading to failure of routine! */ { double wnorm=0.0,x,y,s,c,m,r,a,b,sig,**VM,**UM,*wV,*wsV,*p1,*p2; matrix ws,*U; int finished=0,end,start,i,j,k,maxreps=100; ws=initmat(w->r-1,1L); bidiag(A,w,&ws,V); // bi-diagonalize A.... U=A; VM=V->M;UM=U->M;wV=w->V;wsV=ws.V; for (i=0;ir-1; while (!finished) { for (k=0;k=0;i--) // work out sequence of rotations { m=fabs(y);x=fabs(wV[i]); if (x>m) m=x; x=wV[i]; if (m>0.0) { y/=m;x/=m; // now rotate y into x r=sqrt(y*y+x*x); c=x/r;s=y/r; } else {r=0.0;c=1.0;s=0.0;} wV[i]=r*m; // rotation zeros y (implicitly) if (i>0) // propagate the problem element! { y= -wsV[i-1]*s; wsV[i-1]*=c; } /* Need to update V as well V -> V G where G is the rotation just applied.... */ for (j=0;jr;j++) // work down the rows { p2=VM[j]+end;p1=VM[j]+i;x=*p1; //x=VM[j][i]; //VM[j][i]=c*x+s*VM[j][end]; *p1=c*x+s*(*p2); //VM[j][end]*=c;VM[j][end] += -s*x; *p2 *= c; *p2 += -s*x; } } } end--; // // Check here for termination ..... if (end<=0) finished=1; break; // successfully deflated, so start new QR iteration cycle or finish } else //if (fabs(wsV[end-1])<=1e-15*wnorm) // condition slacker than below if (wsV[end-1]+wnorm==wnorm) //too restrictive??/* wV[end] is a singular value => deflate */ { end--; if (end==0) finished=1; // all elements of ws are zeroed so we're done break; // deflated so start new QR cycle or finish } else // no deflation possible, search for start of sub-matrix { start=end-1; while ((wnorm+wV[start]!=wnorm)&&(wnorm+wsV[start]!=wnorm)&&(start>=0)) start--; start++; // this is now the row and column starting the sub-matrix if ((start>0)&&(wnorm+wV[start-1]==wnorm)&&(wnorm+wsV[start-1]!=wnorm)) { // ws.V[start-1] must be zeroed.... y=wsV[start-1];wsV[start-1]=0.0; for (i=start;i<=end;i++) // get sequence of rotators from left.... { m=fabs(y);x=fabs(wV[i]); if (x>m) m=x; x=wV[i]; if (m>0.0) { x/=m;y/=m; r=sqrt(x*x+y*y); c=x/r;s=y/r; } else {r=1.0;c=1.0;s=0.0;} wV[i]=r*m; // y zeroed implicitly if (ir;j++) // work down the rows { p1=UM[j]+start-1;x = *p1;p2=UM[j]+i;//x=UM[j][start-1]; // UM[j][start-1] = c*x-s*UM[j][i]; *p1 = c*x - s*(*p2); // UM[j][i]*=c; UM[j][i] += +s*x; *p2 *= c; *p2 += s*x; } } } } /* iterate QR algorithm on sub-matrix */ /* First find the Wilkinson shift which is given by the eigenvalue of the bottom right 2 by 2 submatrix, closest to the final matrix element. The required eigenvalues are found directly from the characteristic equation. See page 405 of Watkins.*/ a=wV[end-1]*wV[end-1]+wsV[end-1]*wsV[end-1];b=wV[end];b*=b; c=wV[end]*wsV[end-1]; y=sqrt((a-b)*(a-b)+4*c*c)/2; x=(a+b)/2+y;y=(a+b)/2-y; // x and y are the eigenvalues if (fabs(x-b)m) m=fabs(y); if (m>0.0) { y/=m;x/=m; // avoid over/underflow r=sqrt(y*y+x*x); c=x/r;s=y/r; // elements of rotator to apply from right operating in start,start+1 plane } else { r=1.0;c=1.0;s=0.0;} for (i=start;im) m=fabs(x); if (m>0.0) { x/=m;y/=m; // avoiding overflow r=sqrt(x*x+y*y); c=x/r;s=y/r; } else {r=1.0;c=1.0;s=0.0;} // rotator for zeroing y (at i-1,i+1) int x at (i-1,i) wsV[i-1]=r*m;y=0.0; } // now apply rotator from right to rows i and i+1.... x=wV[i]; wV[i]=c*x+s*wsV[i]; wsV[i]=c*wsV[i]-s*x; y=s*wV[i+1];wV[i+1]*=c; // y contains the bulge at (i+1,i) // and also apply from right to V.... for (j=0;jr;j++) // work down the rows { p1=VM[j]+i;x= *p1;p2=VM[j]+i+1; //x=VM[j][i]; //VM[j][i]=c*x+s*VM[j][i+1]; *p1=c*x + s* (*p2); //VM[j][i+1]*=c;VM[j][i+1] += -s*x; *p2 *= c; *p2 += -s*x; } /* Obtain rotator from left to zero element at (i+1,i) into element at (i,i) thereby creating new bulge at (i,i+2) */ x=wV[i]; m=fabs(y);if (fabs(x)>m) m = fabs(x); if (m>0.0) { x/=m;y/=m; // avoid overflow r=sqrt(x*x+y*y); c=x/r;s=y/r; } else {r=1.0;c=1.0;s=0.0;} // transform to zero y into x (i+1,i) into (i,i) wV[i]=r*m;y=0.0; // apply from left.... x=wsV[i]; wsV[i]=c*x+s*wV[i+1]; wV[i+1]=c*wV[i+1]-s*x; if (ir;j++) // work down the rows { p1=UM[j]+i;x= *p1;p2=UM[j]+i+1;//x=UM[j][i]; //UM[j][i]=c*x+s*UM[j][i+1]; *p1=c*x+s*(*p2); //UM[j][i+1]*=c; UM[j][i+1] += -s*x; *p2 *= c; *p2 += -s*x; } } } if (k==maxreps) ErrorMessage("svd() not converged",1); } freemat(ws); /* make all singular values non-negative */ for (i=0;ir;i++) if (wV[i]<0.0) { wV[i]= -wV[i]; for (j=0;jr;j++) VM[j][i]= -VM[j][i]; } } void symproduct(A,B,C,trace,chol) matrix A,B,C;int trace,chol; /* This routine aims to find the product C=ABA' as fast as possible, when B is symmetric. If 'trace' is non-zero then only the elements on the trace of C are returned. If 'chol' is non-zero then it is assumed that B contains L the choleski factor of B and is lower triangular (zeros at top right). If chol is 1 then it is assumed that the decomposition is LL' otherwise it is assumed to be L'L. The routine choleski returns LL' for the decomp of the matrix or L'L for the decomp of the inverse. */ { long i,j,k,Mr,Mc,Ac,Ar,Bc; double temp,*p,*p1,*p2,**AM,**BM,**CM,**MM; matrix M; AM=A.M;BM=B.M;CM=C.M;Ac=A.c;Ar=A.r;Bc=B.c; if (chol) { M=initmat(Ar,B.c); MM=M.M;Mr=M.r;Mc=M.c; if (chol==1) for (i=0;im) m=r; // scale factor if (m) { xi/=m;ti/=m;} // avoid over/underflow r=sqrt(xi*xi+ti*ti); if (r) { s=xi/r;c= -ti/r;} else {s=0.0;c=1.0;} /* T.M[i][T.r-i-1]=r;*/ for (j=i;j=0;i--) { e=0.0; for (j=i+1;jtol*x2) /* column sufficiently independent */ { for (k=0;ktol) tol=w.V[i];} tol*=reltol; for (i=0;itol) { for (j=0;j1.0) tol=1.0; maxsv=0.0;for (i=0;itol*maxsv) /* estimate rank and do truncation */ { sV[i]=1.0/sV[i];} else sV[i]=0.0; for (i=0;imax) max=fabs(c.V[i]);} max*=1e-14; for (i=0;imax) r++; freemat(b);freemat(c);freemat(d); return(r); } double condition(a) matrix a; /* finds the condition number of a as the ratio of the smallest to largest singular values - returns -ve for infinity */ { int i,j; double res,min,max; matrix b,c,d; b=initmat(a.r,a.c); for (i=0;imax) max=c.V[i];} if (!min) res=-1.0; else res=max/min; freemat(b);freemat(c);freemat(d); return(res); } void specd(U,W) matrix U,W; /* obtains the spectral decomposition of a symmetric matrix U: it is returned as U diag(W) U'. The routine uses svd, and then checks the signs of the eigenvalues for correctness - not exactly the most efficient method!! */ { matrix V; double dot; long i,j; V=initmat(U.r,U.r); svd(&U,&W,&V); for (j=0;j=1.0 then this is taken as the required rank of the pseudoinvese, otherwise its the tolerance used for truncation */ { matrix ws,B,w,C; long i,j,r=0L; double max; B=initmat(A->c,A->c);w=initmat(A->c,1L); svd(A,&w,&B); /* now original A is A diag(w) B' */ /* form pseudoinverse B diag(1/w) A' - but truncating small w_i's */ C=initmat(A->r,A->c); if (trunc<1.0) { max=0.0;for (i=0;imax) max=fabs(w.V[i]); max*=trunc; } else /* the rank of the required pinverse has been supplied */ { if (r-(long)floor(trunc)>0.5) r=(long)floor(trunc)+1;else r=(long)floor(trunc); ws=initmat(w.r,1L); for (i=0;ic;j++) if (fabs(w.V[j])>=max) { for (i=0;ir;i++) C.M[i][j]=A->M[i][j]/w.V[j]; r++; } if (A->r!=A->c) { freemat(*A); *A=initmat(B.r,C.r); } matmult(*A,B,C,0,1); freemat(w); freemat(C); freemat(B); return(r); } void suminvert(A,B,U,W) matrix A,B,U,W; /* If A and B are symmetric matrices then this routine returns U,W, where U is a square matrix and W a vector of eigenvalues. The inverse of A+cB where c is a constant can now be obtained from: U Diag(1.0/(W.V[i]+c)) U' NOTE: B must be +ve definite. */ { matrix L,M,MA,MAM,T; long i,j,k; double *p,temp,*p1,**MM,**LM; T=initmat(A.r,A.r); /* first get choleski factor of B */ L=initmat(B.r,B.r); choleski(B,L,0,0); /* L contains choleski factor */ /* Inverting L (it stays lower triangular)*/ M=initmat(B.r,B.c);MM=M.M;LM=L.M; for (i=0;i*(double *)b) return(1); return(0); } void sort(matrix a) /* sorts a vector, in situ, using standard routine qsort */ { int i; qsort(a.V,(size_t)a.r*a.c,sizeof(a.V[0]),elemcmp); for (i=0;ia.V[i+1]) ErrorMessage("Sort failed",1); } void printmat(matrix A,char *fmt) { int i,j; double n; n=matrixnorm(A); for (i=0;i1e-14*n) printf(fmt,A.M[i][j]); else printf(fmt,0.0); } printf("\n"); } /*********************************************************************************/ /* Update Log (started Jan 2000) */ /*********************************************************************************/ /* 1. Numerical Recipes code removed. This meant writing 2 new routines: svd() and invert(), to replace versions that were just modified from Numerical Recipes. An extra routine bidiag() was also written to bidiagonalize a matrix. Routines were heavily tested against old NR routines and perform at least as well. See internal documentation for algorithm description and references. Both written to be as robust as possible. 12/1/2000 2. UTU() and QR() modified to remove danger of under/overflow in householder calculations, and increase efficiency. Tested using random matrices. 13/1/2000 3. Dependence on inverse.h removed, options for SUNC flag removed. 8/1/2000 4. QT() re-written - now much more efficient (consistent with producing output in its old format), no cancellation error, under/overflow protected, address optimized. NOTE: householder() and Hmult() no longer used in matrix.c - check qp.c before removing! 5. updateLS() has had its Givens rotations protected against under/overflow. (not tested). 6. NOTE: choleskir1ud() has not been checked for underflow/overflow and stability. 7. tricholeski() has been checked again with a variety of rank deficient tridiagonal matrices. Theoretical basis for approach has been much strengthened, but I still can not *prove* that the choleski factor will require a zero only on the *final* element of the main diagonal for a properly tri-diagonal matrix. Hence root() now has a check that the "choleski" factors of the tridiagonal matrix are adequate - if not singular value decomposition is used instead. root() also has an improved column removal strategy. 16/1/2000 8. This -> if (wsV[end-1]+wnorm==wnorm) test for convergence of singular values is rather too stringent! Replaced with something more reasonable. 25/2/2000 15/5/2000 - convergence criteria changed back - eigenvalue used for shift was calculated in a manner that allowed bad cancellation errors - this caused convergence problems AND occasional imaginary eigenvalues for the shift. Fixing this fixed the convergence problem. 9. svd() bug fixed - it division by zero was possible while trying to avoid over/ underflow in Givens rotations - check added to fix problem. 15/5/00 */