function [logmax,dL,H]=nolin_fg_lint_hess(x,Ma,Mb,y) % NOLIN_FG_LINT_HESS is an optimisation script for the training of Gaussian % process prior models. Particularly, the Gaussian Process is 2 Gaussian % processes with independent explanatory variables. One of them uses a % Squared Exponential (SE) covariance function, and the other uses a Linear % covariance function. % % [logmax,dL,H] = NOLIN_FG_LINT_HESS(x,Ma,Mb,y) % % This optimisation script uses both Gradient and Hessian information to % perform optimisation for Gaussian process prior models, by adapting % hyperparameters to minimise negative the log-likelihood function, % % logmax = log(det(Q)) + y'*inv(Q)*y % % The prior covariance function used is of the form, % % C(zi,zj) = P1 + P2 + v % % where, % P1(zi,zj) = a*exp([zi-zj]'*D*[zi-zj]) % P2(zi,zj) = wk*Zk*Zk' % % Inputs: % x = Hyperparameters, [a1 a2 v D1 D2], where D=diag{g1,g2,...,gd} % Ma = Input explanatory variable, of dimension {N0 x D1} % Mb = Input explanatory variable, of dimension {N0 x D2} % y = Outcome (vector) from every input of the explantory variable % % Outputs: % logmax = Negative log-likelihood function value % dL = Gradient vector values % H = Hessian matrix % % This script utilises user-supplied Hessian information, which is capable % of speeding up computation. % % External function dependencies:- % - TRACEAXB = calculation of the trace of product of two matrices. % % % (C) Multiple Gaussian Processes Toolbox 1.0 % (C) Copyright 2006, Keith Neo % http://www.hamilton.ie/keith N1=size(Ma,2); N2=size(Mb,2); a=exp(x(1)); v=exp(x(2)); d1=exp(x(3:2+N1)'); d2=exp(x(3+N1:2+N1+N2)'); meanMb=mean(Mb); Mb=Mb-repmat(meanMb,size(Mb,1),1); expD=constr_lin(Mb,d2); expC=constr_cov(Ma,d1); expA=a*exp(-0.5*sum(expC,3)); Q=diag_add(expA+sum(expD,3),v); invQ=inv(Q); invQy=invQ*y; [cholC,pp]=chol(Q); if pp==0, logdet=2*sum(log(diag(cholC))); else logdet=sum(log(svd(Q))); end logmax=y'*invQy+logdet; H=zeros(2+N1+N2); dL=zeros(2+N1+N2,1); Ra=invQ*expA; Ray=y'*Ra; tmp1=Ra*invQy; dL(1)=trace(Ra)-y'*tmp1; H(1,1)=dL(1)-traceaxb(Ra,Ra)+2*(Ray*tmp1); dL(2)=v*(trace(invQ)-invQy'*invQy); H(2,2)=dL(2)-v*v*(traceaxb(invQ,invQ)-2*(invQy'*invQ*invQy)); H(1,2)=v*(2*(invQy'*tmp1)-traceaxb(invQ,Ra)); H(2,1)=H(1,2); for k=1:N1 Q=expC(:,:,k).*expA; P=invQ*Q; tmp1=P*invQy; dL(2+k)=0.5*(y'*tmp1-trace(P)); H(1,2+k)=dL(2+k)+0.5*traceaxb(Ra,P)-Ray*tmp1; H(2+k,1)=H(1,2+k); H(2,2+k)=v*(0.5*traceaxb(invQ,P)-invQy'*tmp1); H(2+k,2)=H(2,2+k); for h=k:N1 if h==k d2H=Q-0.5*expC(:,:,k).*Q; H(2+k,2+h)=0.5*(invQy'*d2H*invQy+invQy'*Q*tmp1-traceaxb(invQ,d2H)-0.5*traceaxb(P,P)); else d2H=expC(:,:,h).*Q; dQz=invQ*(expC(:,:,h).*expA); H(2+k,2+h)=0.25*(traceaxb(invQ,d2H)-traceaxb(P,dQz)-invQy'*d2H*invQy+2*(y'*dQz*tmp1)); H(2+h,2+k)=H(2+k,2+h); end end for h=1:N2 dQz=invQ*expD(:,:,h); H(2+k,2+N1+h)=0.5*traceaxb(P,dQz)-y'*P*dQz*invQy; H(2+N1+h,2+k)=H(2+k,2+N1+h); end end clear expA expC for k=1:N2 P=invQ*expD(:,:,k); tmp1=P*invQy; dL(2+N1+k)=trace(P)-y'*tmp1; H(1,2+N1+k)=2*(Ray*tmp1)-traceaxb(Ra,P); H(2+N1+k,1)=H(1,2+N1+k); H(2,2+N1+k)=v*(2*(invQy'*tmp1)-traceaxb(invQ,P)); H(2+N1+k,2)=H(2,2+N1+k); for h=k:N2 if h==k H(2+N1+k,2+N1+h)=dL(2+N1+k)+2*(y'*P*tmp1)-traceaxb(P,P); else dQz=invQ*expD(:,:,h); H(2+N1+k,2+N1+h)=2*(y'*dQz*tmp1)-traceaxb(dQz,P); H(2+N1+h,2+N1+k)=H(2+N1+k,2+N1+h); end end end function expC=constr_lin(M,w) [N0,D0]=size(M); expC=zeros(N0,N0,D0); for k=1:D0 expC(:,:,k)=w(k)*M(:,k)*M(:,k)'; end function expC=constr_cov(M,g) N0=size(M,1); expC=repmat(permute(M,[1,3,2]),[1,N0,1])-repmat(permute(M',[3,2,1]),[N0,1,1]); expC=(expC.^2).*repmat(permute(g',[3 2 1]),[N0,N0,1]); function Q=diag_add(Q,v) for k=1:size(Q,1) Q(k,k)=Q(k,k)+v; end