function [logmax,dL,H]=nolin_afagvdfdg_hess(x,Ma,Mb,y) % NOLIN_AFAGVDFDG_HESS is an optimisation script for the training of % Gaussian process prior models. Particularly, the Gaussian Process is 2 % Gaussian processes with independent explanatory variables. % % [logmax,dL,H] = NOLIN_AFAGVDFDG_HESS(x,Ma,Mb,y) % % This optimisation script uses both Gradient and Hessian information to % perform optimisation for Gaussian process prior models, by adapting % hyperparameters to minimise negative the log-likelihood function, % % logmax = log(det(Q)) + y'*inv(Q)*y % % The prior covariance function used is of the form, % % C(zi,zj) = P1 + P2 + v % % where, % P(zi,zj) = a*exp([zi-zj]'*D*[zi-zj]) % % Inputs: % x = Hyperparameters, [a1 a2 v D1 D2], where D=diag{g1,g2,...,gd} % Ma = Input explanatory variable, of dimension {N0 x D1} % Mb = Input explanatory variable, of dimension {N0 x D2} % y = Outcome (vector) from every input of the explantory variable % % Outputs: % logmax = Negative log-likelihood function value % dL = Gradient vector values % H = Hessian matrix % % This script utilises user-supplied Hessian information, which is capable % of speeding up computation. % % Dependencies:- % - TRACEAXB = calculation of the trace of product of two matrices. % % % (C) Multiple Gaussian Processes Toolbox 1.0 % (C) Copyright 2006, Keith Neo % http://www.hamilton.ie/keith N1=size(Ma,2); N2=size(Mb,2); a1=exp(x(1)); a2=exp(x(2)); v=exp(x(3)); d1=exp(x(4:3+N1)); d2=exp(x(4+N1:3+N1+N2)); expC=constr_cov(Ma,d1); expD=constr_cov(Mb,d2); expA=a1*exp(-0.5*sum(expC,3)); expB=a2*exp(-0.5*sum(expD,3)); Q=diag_add(expA+expB,v); invQ=inv(Q); invQy=invQ*y; [P,k]=chol(Q); if k==0, logdet=2*sum(log(diag(P))); else logdet=sum(log(svd(Q))); end logmax=logdet+y'*invQy; H=zeros(3+N1+N2); dL=zeros(3+N1+N2,1); dL(3)=v*(trace(invQ)-invQy'*invQy); H(3,3)=dL(3)-v*v*(traceaxb(invQ,invQ)-2*(invQy'*invQ*invQy)); Ra=invQ*expA; Rb=invQ*expB; Ray=y'*Ra; Rby=y'*Rb; H(1,2)=2*(Ray*invQ*Rby')-traceaxb(Ra,Rb); H(2,1)=H(1,2); for k=1:N1 Q=expC(:,:,k).*expA; P=invQ*Q; tmp1=P*invQy; dL(3+k)=0.5*(y'*tmp1-trace(P)); H(3+k,3)=v*(0.5*traceaxb(invQ,P)-invQy'*tmp1); H(3,3+k)=H(3+k,3); H(3+k,2)=0.5*traceaxb(Rb,P)-Rby*tmp1; H(2,3+k)=H(3+k,2); H(3+k,1)=dL(3+k)+0.5*traceaxb(Ra,P)-Ray*tmp1; H(1,3+k)=H(3+k,1); for h=k:N1 if h==k d2H=Q-0.5*expC(:,:,k).*Q; H(3+k,3+h)=0.5*(invQy'*d2H*invQy+invQy'*Q*P*invQy-traceaxb(invQ,d2H)-0.5*traceaxb(P,P)); else d2H=expC(:,:,h).*Q; dQz=invQ*(expC(:,:,h).*expA); H(3+k,3+h)=0.25*(traceaxb(invQ,d2H)-traceaxb(P,dQz)-invQy'*d2H*invQy+2*(y'*P*dQz*invQy)); H(3+h,3+k)=H(3+k,3+h); end end for h=1:N2 dQz=invQ*(expD(:,:,h).*expB); H(3+k,3+N1+h)=0.5*(y'*P*dQz*invQy)-0.25*traceaxb(P,dQz); H(3+N1+h,3+k)=H(3+k,3+N1+h); end end clear expC expA tmp1=Ra*invQy; dL(1)=trace(Ra)-y'*tmp1; H(1,1)=dL(1)-traceaxb(Ra,Ra)+2*(Ray*tmp1); H(1,3)=v*(2*(invQy'*tmp1)-traceaxb(invQ,Ra)); H(3,1)=H(1,3); for k=1:N2 Q=expD(:,:,k).*expB; P=invQ*Q; tmp1=P*invQy; dL(3+N1+k)=0.5*(y'*tmp1-trace(P)); H(3+N1+k,3)=v*(0.5*traceaxb(invQ,P)-invQy'*tmp1); H(3,3+N1+k)=H(3+N1+k,3); H(3+N1+k,1)=0.5*traceaxb(Ra,P)-Ray*tmp1; H(1,3+N1+k)=H(3+N1+k,1); H(3+N1+k,2)=dL(3+N1+k)+0.5*traceaxb(Rb,P)-Rby*tmp1; H(2,3+N1+k)=H(3+N1+k,2); for h=k:N2 if h==k d2H=Q-0.5*expD(:,:,k).*Q; H(3+N1+k,3+N1+h)=0.5*(invQy'*d2H*invQy+invQy'*Q*P*invQy-traceaxb(invQ,d2H)-0.5*traceaxb(P,P)); else d2H=expD(:,:,h).*Q; dQz=invQ*(expD(:,:,h).*expB); H(3+N1+k,3+N1+h)=0.25*(traceaxb(invQ,d2H)-traceaxb(P,dQz)-invQy'*d2H*invQy+2*(y'*P*dQz*invQy)); H(3+N1+h,3+N1+k)=H(3+k,3+h); end end end clear expD expB tmp1=Rb*invQy; dL(2)=trace(Rb)-y'*tmp1; H(2,2)=dL(2)-traceaxb(Rb,Rb)+2*(Rby*tmp1); H(2,3)=v*(2*(invQy'*tmp1)-traceaxb(invQ,Rb)); H(3,2)=H(2,3); function Q=diag_add(Q,v) for k=1:size(Q,1) Q(k,k)=Q(k,k)+v; end function expC=constr_cov(M,g) N0=size(M,1); expC=repmat(permute(M,[1,3,2]),[1,N0,1])-repmat(permute(M',[3,2,1]),[N0,1,1]); expC=(expC.^2).*repmat(permute(g',[3 2 1]),[N0,N0,1]);