function [logmax,dL,H] = nolin_hess(x,M,y) % NOLIN_HESS is an optimisation script for the training of Gaussian % process prior models. % % [LOGMAX,DL,H] = NOLIN_HESS(x,M,y) % % This optimisation script uses both Gradient and Hessian information to % perform optimisation for Gaussian process prior models, by adapting % hyperparameters to minimise negative the log-likelihood function, % % logmax = log(det(Q)) + y'*inv(Q)*y % % The prior covariance function used is of the form, % % C(zi,zj) = a*{exp([zi-zj]'*D*[zi-zj]) + v} % % Inputs: % x = Hyperparameters, [a g1 g2 ... gd v], D=diag{g1,g2,...,gd} % M = Input explanatory variable, of dimension {N0 x D0} % y = Outcome (vector) from every input of the explantory variable % % Outputs: % LOGMAX = Negative log-likelihood function value % DL = Gradient vector values % H = Hessian matrix % % This script utilises user-supplied Hessian information. To avoid using % Hessian information, choose NOLIN_GRAD instead. Note that, Hessian is % useful in speeding up optimisation routines. % % % (C) Gaussian Process Toolbox 1.0 % (C) Copyright 2005-2007, Keith Neo % http://www.hamilton.ie [N0,L0]=size(M); a=exp(x(1)); g=exp(x(2:L0+1)); V=exp(x(2+L0)); expC=repmat(permute(M,[1,3,2]),[1,N0,1])-repmat(permute(M',[3,2,1]),[N0,1,1]); expC=(expC.^2).*repmat(permute(g,[3 2 1]),[N0,N0,1]); expA=a*exp(-0.5*sum(expC,3)); Q=diag_add(expA,a*V,N0); invQ=inv(Q); invQy=invQ*y; yQy=y'*invQy; [cholC,pp]=chol(Q); if pp==0, logdet=2*sum(log(diag(cholC))); else logdet=sum(log(svd(Q))); end logmax=logdet+yQy; clear cholC; yQQy=invQy'*invQy; dL=zeros(L0+2,1); dL(1,1)=N0-yQy; dL(L0+2,1)=a*V*(trace(invQ)-yQQy); H=zeros(L0+2); H(1,1)=yQy; H(1,L0+2)=a*V*yQQy; H(L0+2,1)=H(1,L0+2); H(L0+2,L0+2)=dL(L0+2,1)-(a*V)^2*(traceAxxB(invQ,invQ)-2*invQy'*invQ*invQy); for k=1:L0 Q=-0.5*expC(:,:,k).*expA; ydQgy=invQy'*Q*invQy; dQinvQ=Q*invQ; dL(1+k,1)=trace(dQinvQ)-ydQgy; H(1,1+k)=ydQgy; H(1+k,1)=H(1,1+k); H(1+k,L0+2)=a*V*(2*invQy'*dQinvQ*invQy-traceAxxB(invQ,dQinvQ)); H(L0+2,1+k)=H(1+k,L0+2); for p=k:L0 if p==k d2H=Q-0.5*expC(:,:,k).*Q; H(1+k,1+p)=traceAxxB(invQ,d2H)-traceAxxB(dQinvQ,dQinvQ)-invQy'*d2H*invQy+2*invQy'*dQinvQ*dQinvQ*y; else d2H=-0.5*expC(:,:,p).*Q; dQinvQp=(-0.5*expC(:,:,p).*expA)*invQ; H(1+k,1+p)=traceAxxB(invQ,d2H)-traceAxxB(dQinvQp,dQinvQ)-invQy'*d2H*invQy+2*invQy'*dQinvQp*dQinvQ*y; end H(1+p,1+k)=H(1+k,1+p); end end function Q=diag_add(Q,v,N0) for pp=1:N0 Q(pp,pp)=Q(pp,pp)+v; end function trace_AB=traceAxxB(A,B) trace_AB=0; dimAB=size(A,1); for i=1:dimAB, trace_AB=trace_AB+A(i,:)*B(:,i); end;