From 3b8a075fe8efb4785f49e4e373968a5e372feb8f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 13:05:46 +0300
Subject: [PATCH 01/60] initial commit of Laplace approximation & EP

---
 LICENSE               |   2 +-
 src/ApproximateGPs.jl |   8 +++
 src/ep.jl             | 121 +++++++++++++++++++++++++++++++++
 src/laplace.jl        | 153 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100644 src/ep.jl
 create mode 100644 src/laplace.jl

diff --git a/LICENSE b/LICENSE
index aa3e25a2..dde3dc88 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Copyright (c) 2021
 
-Ross Viljoen
+The JuliaGaussianProcess Contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 4f255765..e689c623 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -22,4 +22,12 @@ include("svgp.jl")
 include("expected_loglik.jl")
 include("elbo.jl")
 
+using Zygote
+using QuadGK
+using Optim
+
+export laplace_steps, laplace_posterior, optimize_elbo
+include("laplace.jl")
+include("ep.jl")
+
 end
diff --git a/src/ep.jl b/src/ep.jl
new file mode 100644
index 00000000..f29d9551
--- /dev/null
+++ b/src/ep.jl
@@ -0,0 +1,121 @@
+export ith_marginal, mul_dist, div_dist, moment_match, ep_approx_posterior, epsite_pdf
+
+function ith_marginal(d::Union{MvNormal,MvNormalCanon}, i::Int)
+    m = mean(d)
+    v = var(d)
+    return Normal(m[i], sqrt(v[i]))
+end
+function mul_dist(a::NormalCanon, b::NormalCanon)
+    # NormalCanon
+    #     η::T       # σ^(-2) * μ
+    #     λ::T       # σ^(-2)
+    etaAmulB = a.η + b.η
+    lambdaAmulB = a.λ + b.λ
+    return NormalCanon(etaAmulB, lambdaAmulB)
+end
+
+mul_dist(a, b) = mul_dist(convert(NormalCanon, a), convert(NormalCanon, b))
+
+function mul_dist(a::MvNormalCanon, b::MvNormalCanon)
+    # MvNormalCanon
+    #    h::V    # potential vector, i.e. inv(Σ) * μ
+    #    J::P    # precision matrix, i.e. inv(Σ)
+    hAmulB = a.h + b.h
+    JAmulB = a.J + b.J
+    return MvNormalCanon(hAmulB, JAmulB)
+end
+
+mul_dist(a::MvNormal, b) = mul_dist(canonform(a), b)
+
+function div_dist(a::NormalCanon, b::NormalCanon)
+    # NormalCanon
+    #     η::T       # σ^(-2) * μ
+    #     λ::T       # σ^(-2)
+    etaAdivB = a.η - b.η
+    lambdaAdivB = a.λ - b.λ
+    return NormalCanon(etaAdivB, lambdaAdivB)
+end
+
+div_dist(a::Normal, b) = div_dist(convert(NormalCanon, a), b)
+div_dist(a, b::Normal) = div_dist(a, convert(NormalCanon, b))
+
+#function EPSite(Z, m, s2)
+#    return (; Z, m, s2)
+#end
+#
+#function epsite_dist(site)
+#    return Normal(site.m, sqrt(site.s2))
+#end
+
+epsite_dist(site) = site.q
+
+function epsite_pdf(site, f)
+    return site.Z * pdf(epsite_dist(site), f)
+end
+
+function moment_match(cav_i::UnivariateDistribution, lik_eval_i)
+    lower = mean(cav_i) - 20 * std(cav_i)
+    upper = mean(cav_i) + 20 * std(cav_i)
+    m0, _ = quadgk(f -> pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+    m1, _ = quadgk(f -> f * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+    m2, _ = quadgk(f -> f^2 * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+    matched_Z = m0
+    matched_mean = m1 / m0
+    matched_var = m2 / m0 - matched_mean^2
+    return (; Z=matched_Z, q=Normal(matched_mean, sqrt(matched_var)))
+end
+
+function ep_approx_posterior(prior, sites::AbstractVector)
+    canon_site_dists = [convert(NormalCanon, t.q) for t in sites]
+    potentials = [q.η for q in canon_site_dists]
+    precisions = [q.λ for q in canon_site_dists]
+    ts_dist = MvNormalCanon(potentials, precisions)
+    return mul_dist(prior, ts_dist)
+end
+
+function EPProblem(p::MvNormal, lik_evals::AbstractVector)
+    return (; p, lik_evals)
+end
+
+function EPState(q::MvNormal, sites::AbstractVector)
+    return (; q, sites)
+end
+
+function ep_single_site_update(ep_problem, ep_state, i::Int)
+    q_fi = ith_marginal(ep_state.q, i)
+    alik_i = epsite_dist(ep_state.sites[i].q)
+    cav_i = div_dist(q_fi, alik_i)
+    qhat_i = moment_match(cav_i, ep_problem.lik_evals[i])
+    new_t = div_dist(qhat_i.q, cav_i)
+    #delta_eta = 
+    #new_q = rank_one_update(ep_state.q, 
+
+    #new_q = 
+    #return EPState(new_q, new_sites)
+end
+
+function ep_step!(ep_tuple)
+    return "work in progress"
+end
+
+function EPResult(results)
+    return (; results)
+end
+
+function ep_steps(dist_y_given_f, f_prior, y; maxiter = 100)
+    f = mean(f_prior)
+    @assert f == zero(f)  # might work with non-zero prior mean but not checked
+    converged = false
+    res_array = []
+    for i = 1:maxiter
+        results = ep_step!(f, dist_y_given_f, f_prior, y)
+        push!(res_array, EPResult(results))
+        if isapprox(f, results.fnew)
+            break  # converged
+        else
+            f = results.fnew
+        end
+    end
+    return res_array
+end
+
diff --git a/src/laplace.jl b/src/laplace.jl
new file mode 100644
index 00000000..d47ba417
--- /dev/null
+++ b/src/laplace.jl
@@ -0,0 +1,153 @@
+function _laplace_train_intermediates(overall_loglik, K, f)
+    loglik = overall_loglik(f)  # TODO use withgradient instead
+    d_loglik, = gradient(overall_loglik, f)
+    d2_loglik, = diaghessian(overall_loglik, f)
+
+    W = -Diagonal(d2_loglik)
+    Wsqrt = sqrt(W)
+    B = I + Wsqrt * K * Wsqrt
+    B_ch = cholesky(Symmetric(B))
+    b = W * f + d_loglik
+    a = b - Wsqrt * (B_ch \ (Wsqrt * K * b))
+    
+    return (; W, Wsqrt, K, a, loglik, d_loglik, B_ch)
+end
+
+function _newton_step(overall_loglik, K, f)
+    cache = _laplace_train_intermediates(overall_loglik, K, f)
+    fnew = K * cache.a
+    return fnew, cache
+end
+
+function laplace_lml(f, c)
+    return -c.a' * f / 2 + c.loglik - sum(log.(diag(c.B_ch.L)))
+end
+
+function laplace_f_cov(cache)
+    # (K⁻¹ + W)⁻¹
+    # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
+    # = (√W⁻¹) (√W⁻¹ K⁻¹ √W⁻¹ + I)⁻¹ (√W⁻¹)
+    # ; (I + C⁻¹)⁻¹ = I - (I + C)⁻¹
+    # = (√W⁻¹) (I - (I + √W K √W)⁻¹) (√W⁻¹)
+    # = (√W⁻¹) (I - B⁻¹) (√W⁻¹)
+    B_ch = cache.B_ch
+    Wsqrt_inv = inv(cache.Wsqrt)
+    return Wsqrt_inv * (I - inv(B_ch)) * Wsqrt_inv
+end
+
+function LaplaceResult(f, fnew, cache)
+    # TODO should we use fnew?
+    f_cov = laplace_f_cov(cache)
+    q = MvNormal(f, AbstractGPs._symmetric(f_cov))
+    lml_approx = laplace_lml(f, cache)
+
+    return (; f, f_cov, q, lml_approx, cache)
+end
+
+function laplace_steps(dist_y_given_f, f_prior, ys; maxiter = 100, f = mean(f_prior))
+    @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
+    @assert length(ys) == length(f_prior) == length(f)
+
+    overall_loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, ys))
+    K = cov(f_prior)
+
+    res_array = []
+    for i = 1:maxiter
+        @info "iteration $i"
+        fnew, cache = _newton_step(overall_loglik, K, f)
+
+        push!(res_array, LaplaceResult(f, fnew, cache))
+        # TODO don't do all these computations
+
+        if isapprox(f, fnew)
+            break  # converged
+        else
+            f = fnew
+        end
+    end
+
+    return res_array
+end
+
+function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y)
+    newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
+    f_post = LaplacePosteriorGP(lfX.fx, newt_res[end])
+    return f_post
+end
+
+function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
+    lf = build_latent_gp(theta0)
+    lfX = lf(X)
+    f = mean(lfX.fx)
+
+    function objective(theta)
+        # @info "Hyperparameters: $theta" # TODO does not work with Zygote
+        lf = build_latent_gp(theta)
+        lfX = lf(X)
+
+        f_opt = Zygote.ignore() do
+            newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
+            f_opt = newt_res[end].f
+            f .= f_opt
+            return f_opt
+        end
+
+	# TODO ideally I wouldn't copy&paste the following lines
+        overall_loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, Y))
+        K = cov(lfX.fx)
+
+	# but we have to re-compute this outside the Zygote.ignore() to compute gradients
+        cache = _laplace_train_intermediates(overall_loglik, K, f_opt)
+        return -laplace_lml(f_opt, cache)
+    end
+
+    training_results = Optim.optimize(
+        objective, θ -> only(Zygote.gradient(objective, θ)), theta0, optimizer, optim_options;
+        inplace=false,
+    )
+    
+    lf = build_latent_gp(training_results.minimizer)
+    lfX = lf(X)
+
+    newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
+    f_post = LaplacePosteriorGP(lfX.fx, newt_res[end])
+    return f_post
+end
+
+struct LaplacePosteriorGP{Tprior,Tdata} <: AbstractGPs.AbstractGP
+    prior::Tprior
+    data::Tdata
+end
+
+function _laplace_predict_intermediates(cache, prior_at_x, xnew)
+    k_x_xnew = cov(prior_at_x.f, prior_at_x.x, xnew)
+    f_mean = mean(prior_at_x.f, xnew) + k_x_xnew' * cache.d_loglik
+    L = cache.B_ch.L
+    v = L \ (cache.Wsqrt * k_x_xnew)
+    return f_mean, v
+end
+
+function StatsBase.mean_and_var(f::LaplacePosteriorGP, x::AbstractVector)
+    f_mean, v = _laplace_predict_intermediates(f.data.cache, f.prior, x)
+    f_var = var(f.prior.f, x) - vec(sum(v .^ 2, dims = 1))
+    return f_mean, f_var
+end
+
+function StatsBase.mean_and_cov(f::LaplacePosteriorGP, x::AbstractVector)
+    f_mean, v = _laplace_predict_intermediates(f.data.cache, f.prior, x)
+    f_cov = cov(f.prior.f, x) - v' * v
+    return f_mean, f_cov
+end
+
+function Statistics.mean(f::LaplacePosteriorGP, x::AbstractVector)
+    d_loglik = f.data.cache.d_loglik
+    return mean(f.prior.f, x) + cov(f.prior.f, f.prior.x, x)' * d_loglik
+end
+
+function Statistics.cov(f::LaplacePosteriorGP, x::AbstractVector)
+    return last(mean_and_cov(f, x))
+end
+
+function Statistics.var(f::LaplacePosteriorGP, x::AbstractVector)
+    return last(mean_and_var(f, x))
+end

From c73e9ce45356b1bab22b722aee6a48763e060044 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 13:18:32 +0300
Subject: [PATCH 02/60] Laplace demo

---
 demo_laplace.jl | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 demo_laplace.jl

diff --git a/demo_laplace.jl b/demo_laplace.jl
new file mode 100644
index 00000000..f809d50e
--- /dev/null
+++ b/demo_laplace.jl
@@ -0,0 +1,39 @@
+using Random
+using LogExpFunctions
+using Distributions
+using Plots
+using Optim
+using AbstractGPs
+using AbstractGPs: LatentFiniteGP
+using NonConjugateGPViz
+
+Random.seed!(1)
+X = range(0, 23.5; length=48)
+fs = @. 3*sin(10 + 0.6X) + sin(0.1X) - 1
+# invlink = normcdf
+invlink = logistic
+ps = invlink.(fs)
+Y = [rand(Bernoulli(p)) for p in ps]
+
+function plot_data()
+    plot()
+    plot!(X, ps)
+    scatter!(X, Y)
+end
+
+dist_y_given_f(f) = Bernoulli(invlink(f))
+
+theta0 = [0.0, 1.0]
+
+optimizer = LBFGS(;
+    alphaguess=Optim.LineSearches.InitialStatic(; scaled=true),
+    linesearch=Optim.LineSearches.BackTracking(),
+)
+optim_options = Optim.Options(; iterations=100)
+
+function build_latent_gp(theta)
+    variance = softplus(theta[1])
+    lengthscale = softplus(theta[2])
+    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
+    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
+end

From 93ec7952bf8ab8fa1608407e536582b8301d2fa2 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 13:44:58 +0300
Subject: [PATCH 03/60] bugfix

---
 src/laplace.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index d47ba417..803bbe88 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -93,7 +93,7 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
         end
 
 	# TODO ideally I wouldn't copy&paste the following lines
-        overall_loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, Y))
+        overall_loglik(fs) = sum(logpdf(lfX.lik(f), y) for (f, y) in zip(fs, Y))
         K = cov(lfX.fx)
 
 	# but we have to re-compute this outside the Zygote.ignore() to compute gradients

From d62b2dc53276ff6a4a50a8418915c2794f12889f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 14:00:34 +0300
Subject: [PATCH 04/60] bugfix demo

---
 demo_laplace.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo_laplace.jl b/demo_laplace.jl
index f809d50e..19af1d3f 100644
--- a/demo_laplace.jl
+++ b/demo_laplace.jl
@@ -5,7 +5,7 @@ using Plots
 using Optim
 using AbstractGPs
 using AbstractGPs: LatentFiniteGP
-using NonConjugateGPViz
+using ApproximateGPs
 
 Random.seed!(1)
 X = range(0, 23.5; length=48)

From 76afc5533d1dacf7f837594ba0ad7e25c47b5c00 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 15:45:13 +0300
Subject: [PATCH 05/60] WIP: cleanup; pass around dist_y_given_f explicitly

---
 Project.toml   |   3 ++
 src/laplace.jl | 123 +++++++++++++++++++++++++++++++------------------
 2 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3a9842ab..3aee3f39 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,10 +12,13 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
 KLDivergences = "3c9cd921-3d3f-41e2-830c-e020174918cc"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 AbstractGPs = "0.3, 0.4, 0.5"
diff --git a/src/laplace.jl b/src/laplace.jl
index 803bbe88..82cb1c0e 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -1,25 +1,43 @@
-function _laplace_train_intermediates(overall_loglik, K, f)
-    loglik = overall_loglik(f)  # TODO use withgradient instead
-    d_loglik, = gradient(overall_loglik, f)
-    d2_loglik, = diaghessian(overall_loglik, f)
-
-    W = -Diagonal(d2_loglik)
+function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
+    # Ψ = log p(y|f) + log p(f)
+    #   = loglik + log p(f)
+    # dΨ/df = d_loglik - K⁻¹ f
+    # at fhat: d_loglik = K⁻¹ f
+	
+    # d²Ψ/df² = d2_loglik - K⁻¹
+    #         = -W - K⁻¹
+
+    ll, d_ll, d2_ll = loglik_and_derivs(dist_y_given_f, ys, f)
+
+    W = -Diagonal(d2_ll)
     Wsqrt = sqrt(W)
     B = I + Wsqrt * K * Wsqrt
     B_ch = cholesky(Symmetric(B))
-    b = W * f + d_loglik
+    b = W * f + d_ll
     a = b - Wsqrt * (B_ch \ (Wsqrt * K * b))
     
-    return (; W, Wsqrt, K, a, loglik, d_loglik, B_ch)
+    return (; W, Wsqrt, K, a, loglik=ll, d_loglik=d_ll, B_ch)
 end
 
-function _newton_step(overall_loglik, K, f)
-    cache = _laplace_train_intermediates(overall_loglik, K, f)
+# dist_y_given_f(f) = Bernoulli(logistic(f))
+
+function loglik_and_derivs(dist_y_given_f, ys::AbstractVector, f::AbstractVector{<:Real})
+    loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, ys))
+    ll = loglik(f)  # TODO use withgradient instead
+    d_ll, = gradient(loglik, f)
+    d2_ll, = diaghessian(loglik, f)
+    return ll, d_ll, d2_ll
+end
+#lik = f -> logpdf(Distribution(f), y)
+#dlik = ForwardDiff.derivative(lik)
+
+function _newton_step(dist_y_given_f, ys, K, f)
+    cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     fnew = K * cache.a
     return fnew, cache
 end
 
-function laplace_lml(f, c)
+function _laplace_lml(f, c)
     return -c.a' * f / 2 + c.loglik - sum(log.(diag(c.B_ch.L)))
 end
 
@@ -39,7 +57,7 @@ function LaplaceResult(f, fnew, cache)
     # TODO should we use fnew?
     f_cov = laplace_f_cov(cache)
     q = MvNormal(f, AbstractGPs._symmetric(f_cov))
-    lml_approx = laplace_lml(f, cache)
+    lml_approx = _laplace_lml(f, cache)
 
     return (; f, f_cov, q, lml_approx, cache)
 end
@@ -48,18 +66,18 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter = 100, f = mean(f_pr
     @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
     @assert length(ys) == length(f_prior) == length(f)
 
-    overall_loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, ys))
     K = cov(f_prior)
 
     res_array = []
     for i = 1:maxiter
-        @info "iteration $i"
-        fnew, cache = _newton_step(overall_loglik, K, f)
+        @info "  - Newton iteration $i"
+        fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
 
         push!(res_array, LaplaceResult(f, fnew, cache))
-        # TODO don't do all these computations
+        # TODO don't do all these computations unless we actually want them
 
         if isapprox(f, fnew)
+	    @info "  + converged"
             break  # converged
         else
             f = fnew
@@ -69,53 +87,70 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter = 100, f = mean(f_pr
     return res_array
 end
 
-function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y)
-    newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
+function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y; kwargs...)
+    newt_res = laplace_steps(lfX.lik, lfX.fx, Y; kwargs...)
     f_post = LaplacePosteriorGP(lfX.fx, newt_res[end])
     return f_post
 end
 
+"""
+laplace_lml(f::Vector, lfX::LatentFiniteGP, Y::Vector)
+
+`f` 
+"""
+function laplace_lml!(f, lfX, Y)
+    f_opt = Zygote.ignore() do
+        newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
+        f_opt = newt_res[end].f
+        f .= f_opt
+        return f_opt
+    end
+
+    # TODO ideally I wouldn't copy&paste the following lines
+    # but we have to re-compute this outside the Zygote.ignore() to compute gradients
+    cache = _laplace_train_intermediates(lfX.lik, Y, cov(lfX.fx), f_opt)
+    return _laplace_lml(f_opt, cache)
+end
+
+#function rrule(::laplace_lml!, ...)
+#
+#end
+
+function laplace_lml(lfX, Y)
+    f = mean(lfX.fx)
+    return laplace_lml!(f, lfX, Y)
+end
+
 function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
     lf = build_latent_gp(theta0)
-    lfX = lf(X)
-    f = mean(lfX.fx)
+    f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
 
     function objective(theta)
-        # @info "Hyperparameters: $theta" # TODO does not work with Zygote
+        Zygote.ignore() do
+	    # Zygote does not like the try/catch within @info
+	    @info "Hyperparameters: $theta" 
+	end
         lf = build_latent_gp(theta)
-        lfX = lf(X)
-
-        f_opt = Zygote.ignore() do
-            newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
-            f_opt = newt_res[end].f
-            f .= f_opt
-            return f_opt
-        end
-
-	# TODO ideally I wouldn't copy&paste the following lines
-        overall_loglik(fs) = sum(logpdf(lfX.lik(f), y) for (f, y) in zip(fs, Y))
-        K = cov(lfX.fx)
-
-	# but we have to re-compute this outside the Zygote.ignore() to compute gradients
-        cache = _laplace_train_intermediates(overall_loglik, K, f_opt)
-        return -laplace_lml(f_opt, cache)
+	lml = laplace_lml!(f, lf(X), Y)
+        return -lml
     end
 
+    #training_results = Optim.optimize(
+    #    objective, θ -> only(Zygote.gradient(objective, θ)), theta0, optimizer, optim_options;
+    #    inplace=false,
+    #)
     training_results = Optim.optimize(
-        objective, θ -> only(Zygote.gradient(objective, θ)), theta0, optimizer, optim_options;
+        objective, theta0, optimizer, optim_options;
         inplace=false,
     )
     
     lf = build_latent_gp(training_results.minimizer)
-    lfX = lf(X)
-
-    newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
-    f_post = LaplacePosteriorGP(lfX.fx, newt_res[end])
-    return f_post
+    f_post = laplace_posterior(lf(X), Y; f)
+    return f_post, training_results
 end
 
 struct LaplacePosteriorGP{Tprior,Tdata} <: AbstractGPs.AbstractGP
-    prior::Tprior
+    prior::Tprior  # this is lfx.fx; should we store lfx itself (including lik) instead?
     data::Tdata
 end
 

From 46009e45fc6129315207d1dd4aaed81fc1873605 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 15:53:21 +0300
Subject: [PATCH 06/60] use ForwardDiff elementwise (much faster)

---
 Project.toml          |  1 +
 src/ApproximateGPs.jl |  1 +
 src/laplace.jl        | 17 +++++++++++------
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3aee3f39..5f2cb9ff 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
 KLDivergences = "3c9cd921-3d3f-41e2-830c-e020174918cc"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index e689c623..fce4db97 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -23,6 +23,7 @@ include("expected_loglik.jl")
 include("elbo.jl")
 
 using Zygote
+using ForwardDiff
 using QuadGK
 using Optim
 
diff --git a/src/laplace.jl b/src/laplace.jl
index 82cb1c0e..2c86ff63 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -22,14 +22,19 @@ end
 # dist_y_given_f(f) = Bernoulli(logistic(f))
 
 function loglik_and_derivs(dist_y_given_f, ys::AbstractVector, f::AbstractVector{<:Real})
-    loglik(fs) = sum(logpdf(dist_y_given_f(f), y) for (f, y) in zip(fs, ys))
-    ll = loglik(f)  # TODO use withgradient instead
-    d_ll, = gradient(loglik, f)
-    d2_ll, = diaghessian(loglik, f)
+    function per_observation(fhat, y)
+        ll(f) = logpdf(dist_y_given_f(f), y)
+	d_ll(f) = ForwardDiff.derivative(ll, f)
+	d2_ll(f) = ForwardDiff.derivative(d_ll, f)
+	return ll(fhat), d_ll(fhat), d2_ll(fhat)
+    end
+    vec_of_l_d_d2 = map(per_observation, f, ys)
+    ls = map(res -> res[1], vec_of_l_d_d2)
+    ll = sum(ls)
+    d_ll = map(res -> res[2], vec_of_l_d_d2)
+    d2_ll = map(res -> res[3], vec_of_l_d_d2)
     return ll, d_ll, d2_ll
 end
-#lik = f -> logpdf(Distribution(f), y)
-#dlik = ForwardDiff.derivative(lik)
 
 function _newton_step(dist_y_given_f, ys, K, f)
     cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f)

From 0c1a3296c8120979c652cb5fcc40a80e31eb0c56 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 17:06:45 +0300
Subject: [PATCH 07/60] format

---
 src/laplace.jl | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 2c86ff63..1a1b2055 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -3,7 +3,7 @@ function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     #   = loglik + log p(f)
     # dΨ/df = d_loglik - K⁻¹ f
     # at fhat: d_loglik = K⁻¹ f
-	
+
     # d²Ψ/df² = d2_loglik - K⁻¹
     #         = -W - K⁻¹
 
@@ -15,7 +15,7 @@ function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     B_ch = cholesky(Symmetric(B))
     b = W * f + d_ll
     a = b - Wsqrt * (B_ch \ (Wsqrt * K * b))
-    
+
     return (; W, Wsqrt, K, a, loglik=ll, d_loglik=d_ll, B_ch)
 end
 
@@ -24,9 +24,9 @@ end
 function loglik_and_derivs(dist_y_given_f, ys::AbstractVector, f::AbstractVector{<:Real})
     function per_observation(fhat, y)
         ll(f) = logpdf(dist_y_given_f(f), y)
-	d_ll(f) = ForwardDiff.derivative(ll, f)
-	d2_ll(f) = ForwardDiff.derivative(d_ll, f)
-	return ll(fhat), d_ll(fhat), d2_ll(fhat)
+        d_ll(f) = ForwardDiff.derivative(ll, f)
+        d2_ll(f) = ForwardDiff.derivative(d_ll, f)
+        return ll(fhat), d_ll(fhat), d2_ll(fhat)
     end
     vec_of_l_d_d2 = map(per_observation, f, ys)
     ls = map(res -> res[1], vec_of_l_d_d2)
@@ -67,14 +67,14 @@ function LaplaceResult(f, fnew, cache)
     return (; f, f_cov, q, lml_approx, cache)
 end
 
-function laplace_steps(dist_y_given_f, f_prior, ys; maxiter = 100, f = mean(f_prior))
+function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior))
     @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
     @assert length(ys) == length(f_prior) == length(f)
 
     K = cov(f_prior)
 
     res_array = []
-    for i = 1:maxiter
+    for i in 1:maxiter
         @info "  - Newton iteration $i"
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
 
@@ -82,7 +82,7 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter = 100, f = mean(f_pr
         # TODO don't do all these computations unless we actually want them
 
         if isapprox(f, fnew)
-	    @info "  + converged"
+            @info "  + converged"
             break  # converged
         else
             f = fnew
@@ -132,11 +132,11 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
 
     function objective(theta)
         Zygote.ignore() do
-	    # Zygote does not like the try/catch within @info
-	    @info "Hyperparameters: $theta" 
-	end
+            # Zygote does not like the try/catch within @info
+            @info "Hyperparameters: $theta"
+        end
         lf = build_latent_gp(theta)
-	lml = laplace_lml!(f, lf(X), Y)
+        lml = laplace_lml!(f, lf(X), Y)
         return -lml
     end
 
@@ -145,10 +145,9 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
     #    inplace=false,
     #)
     training_results = Optim.optimize(
-        objective, theta0, optimizer, optim_options;
-        inplace=false,
+        objective, theta0, optimizer, optim_options; inplace=false
     )
-    
+
     lf = build_latent_gp(training_results.minimizer)
     f_post = laplace_posterior(lf(X), Y; f)
     return f_post, training_results
@@ -169,7 +168,7 @@ end
 
 function StatsBase.mean_and_var(f::LaplacePosteriorGP, x::AbstractVector)
     f_mean, v = _laplace_predict_intermediates(f.data.cache, f.prior, x)
-    f_var = var(f.prior.f, x) - vec(sum(v .^ 2, dims = 1))
+    f_var = var(f.prior.f, x) - vec(sum(v .^ 2; dims=1))
     return f_mean, f_var
 end
 

From 88410cc691a17032a67911c19c7096920bdec165 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 17 Sep 2021 17:09:21 +0300
Subject: [PATCH 08/60] WIP: gradient

---
 src/laplace.jl | 94 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 10 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 1a1b2055..05ca8c9b 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -46,6 +46,8 @@ function _laplace_lml(f, c)
     return -c.a' * f / 2 + c.loglik - sum(log.(diag(c.B_ch.L)))
 end
 
+#function _laplace_lml_deriv()
+
 function laplace_f_cov(cache)
     # (K⁻¹ + W)⁻¹
     # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
@@ -117,15 +119,82 @@ function laplace_lml!(f, lfX, Y)
     return _laplace_lml(f_opt, cache)
 end
 
-#function rrule(::laplace_lml!, ...)
-#
-#end
+function laplace_lml_nonewton(f_opt, lfX, Y)
+    cache = _laplace_train_intermediates(lfX.lik, Y, cov(lfX.fx), f_opt)
+    return _laplace_lml(f_opt, cache)
+    # d lml / d theta = \partial lml / \partial theta + \partial lml / \partial fhat \partial fhat / \partial theta
+end
+
+function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    f = f_init
+    for i in 1:maxiter
+        @info "  - Newton iteration $i"
+        fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
 
-function laplace_lml(lfX, Y)
-    f = mean(lfX.fx)
-    return laplace_lml!(f, lfX, Y)
+        if isapprox(f, fnew)
+            @info "  + converged"
+            break  # converged
+        else
+            f = fnew
+        end
+    end
+    return fnew #, cache
 end
 
+function ChainRulesCore.rrule(
+    config::RuleConfig{>:HasReverseMode},
+    ::typeof(newton_inner_loop),
+    K,
+    dist_y_given_f,
+    Y;
+    f_init,
+    maxiter,
+)
+    fopt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
+    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, fopt)
+
+    function newton_pullback(Δfopt)
+        δself = NoTangent()
+        function newton_single_step(K, dist_y_given_f, ys)
+            fnew, cache = _newton_step(dist_y_given_f, ys, K, fopt)
+            return fnew
+        end
+
+        fopt2, d_fopt = rrule_via_ad(config, newton_single_step, K, dist_y_given_f, Y)
+        return d_fopt(Δfopt)
+        dfopt_dK_times_Δfopt = d_fopt(Δfopt)[2]
+
+        dfopt_dlik_times_Δfopt = @not_implemented(
+            "gradient of lml w.r.t. likelihood parameters"
+        )
+        dfopt_dY_times_Δfopt = @not_implemented("gradient of lml w.r.t. observations")
+        return (δself, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
+    end
+
+    return (fnew, cache), newton_pullback
+end
+
+function laplace_lml(K, dist_y_given_f, Y; f=zeros(length(Y)), maxiter)
+    f_opt, cache = newton_inner_loop(dist_y_given_f, Y, K; f, maxiter)
+    return _laplace_lml(f_opt, cache)
+end
+
+#function ChainRulesCore.rrule(::typeof(laplace_lml), K, dist_y_given_f, Y)
+#    newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
+#    f_opt = newt_res[end].f
+#    cache = newt_res[end].cache
+#    lml = _laplace_lml(f_opt, cache)
+#
+#    function laplace_lml_pullback(Δlml)
+#        ..._d(lfX.fx.f)
+#	[..._d("lfx.lik")]
+#        dlml_dlfX = ?
+#        return NoTangent(), dlml_dlfX * Δlml, @not_implemented("gradient of lml @not_implemented("gradient of lml w.r.t. observations")
+#    end
+#
+#    return lml, laplace_lml_pullback
+#end
+
 function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
     lf = build_latent_gp(theta0)
     f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
@@ -140,13 +209,18 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
         return -lml
     end
 
+    training_results = Optim.optimize(
+        objective,
+        θ -> only(Zygote.gradient(objective, θ)),
+        theta0,
+        optimizer,
+        optim_options;
+        inplace=false,
+    )
     #training_results = Optim.optimize(
-    #    objective, θ -> only(Zygote.gradient(objective, θ)), theta0, optimizer, optim_options;
+    #    objective, theta0, optimizer, optim_options;
     #    inplace=false,
     #)
-    training_results = Optim.optimize(
-        objective, theta0, optimizer, optim_options; inplace=false
-    )
 
     lf = build_latent_gp(training_results.minimizer)
     f_post = laplace_posterior(lf(X), Y; f)

From 4853b5fb087ed38d27d2f5b531e4cb8c171dfd4f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Mon, 20 Sep 2021 20:42:37 +0300
Subject: [PATCH 09/60] *Very* WIP - but works

---
 demo_laplace.jl |  54 ++++++++++++++--
 src/laplace.jl  | 160 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 178 insertions(+), 36 deletions(-)

diff --git a/demo_laplace.jl b/demo_laplace.jl
index 19af1d3f..9959b664 100644
--- a/demo_laplace.jl
+++ b/demo_laplace.jl
@@ -23,6 +23,20 @@ end
 
 dist_y_given_f(f) = Bernoulli(invlink(f))
 
+function build_latent_gp(theta)
+    variance = softplus(theta[1])
+    lengthscale = softplus(theta[2])
+    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
+    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
+end
+
+function plot_samples!(Xgrid, fpost; samples=100, color=2)
+    fsamples = rand(fpost(Xgrid, 1e-8), samples)
+    plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
+end
+
+using Zygote
+
 theta0 = [0.0, 1.0]
 
 optimizer = LBFGS(;
@@ -31,9 +45,39 @@ optimizer = LBFGS(;
 )
 optim_options = Optim.Options(; iterations=100)
 
-function build_latent_gp(theta)
-    variance = softplus(theta[1])
-    lengthscale = softplus(theta[2])
-    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
-    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
+lf = build_latent_gp(theta0)
+lfX = lf(X)
+
+newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
+
+f_post, opt_res = ApproximateGPs.optimize_elbo(build_latent_gp, theta0, X, Y, NelderMead(), optim_options)
+
+theta1 = opt_res.minimizer
+
+function full_objective(theta)
+    Zygote.ignore() do
+        # Zygote does not like the try/catch within @info
+        @info "Hyperparameters: $theta"
+    end
+    lf = build_latent_gp(theta)
+    f = zeros(length(X))
+    lml = ApproximateGPs.laplace_lml!(f, lf(X), Y)
+    return -lml
+end
+
+function nonewton_objective(theta)
+    _lf = build_latent_gp(theta)
+    return -ApproximateGPs.laplace_lml_nonewton(newt_res[end].f, _lf(X), Y)
+end
+
+using FiniteDifferences
+
+FiniteDifferences.grad(central_fdm(5, 1), full_objective, theta0)
+
+
+function comp_lml(theta)
+    _lf = build_latent_gp(theta)
+    K = kernelmatrix(_lf.f.kernel, X)
+    dist_y_given_f = _lf.lik
+    return ApproximateGPs.laplace_lml(K, dist_y_given_f, Y; maxiter=100)
 end
diff --git a/src/laplace.jl b/src/laplace.jl
index 05ca8c9b..e569d5c5 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -125,57 +125,144 @@ function laplace_lml_nonewton(f_opt, lfX, Y)
     # d lml / d theta = \partial lml / \partial theta + \partial lml / \partial fhat \partial fhat / \partial theta
 end
 
-function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+NEWTON_COUNTER = 0
+
+function _reset()
+	global NEWTON_COUNTER
+	NEWTON_COUNTER = 0
+end
+
+function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    global NEWTON_COUNTER
     f = f_init
+    cache = nothing
     for i in 1:maxiter
-        @info "  - Newton iteration $i"
+	Zygote.ignore() do
+		@info "  - Newton iteration $i: f[1:3]=$(f[1:3])"
+	end
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
+	NEWTON_COUNTER += 1
 
         if isapprox(f, fnew)
-            @info "  + converged"
+	    Zygote.ignore() do
+		    @info "  + converged"
+	    end
             break  # converged
         else
             f = fnew
         end
     end
-    return fnew #, cache
+    return f, cache
 end
 
-function ChainRulesCore.rrule(
-    config::RuleConfig{>:HasReverseMode},
-    ::typeof(newton_inner_loop),
-    K,
-    dist_y_given_f,
-    Y;
-    f_init,
-    maxiter,
-)
-    fopt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
-    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, fopt)
-
-    function newton_pullback(Δfopt)
-        δself = NoTangent()
-        function newton_single_step(K, dist_y_given_f, ys)
-            fnew, cache = _newton_step(dist_y_given_f, ys, K, fopt)
-            return fnew
-        end
+function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    f_opt, _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    return f_opt
+end
+
+function ChainRulesCore.frule((Δself, ΔK, Δdist_y_given_f, Δys), ::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter)
+    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+
+    # f = K grad_log_p_y_given_f(f)
+    # fdot = Kdot grad_log_p_y_given_f(f) + K grad2_log_p_y_given_f(f) fdot
+    # fdot (I - K grad2_log_p_y_given_f(f)) = Kdot grad_log_p_y_given_f(f)
+    # fdot = (I - K grad2_log_p_y_given_f(f))⁻¹ Kdot grad_log_p_y_given_f(f)
+    # (I - K grad2_log_p_y_given_f(f)) = (I + K W) = (√W)⁻¹ (I + √W K √W) √W = (√W)⁻¹ B √W
+    # fdot = (√W)⁻¹ B⁻¹ √W Kdot grad_log_p_y_given_f(f)
+    ∂f_opt = cache.Wsqrt \ (cache.B_ch \ (cache.Wsqrt * (ΔK * cache.d_loglik)))
+
+    @info "Hit frule"
+
+    return f_opt, (∂f_opt, NoTangent())
+end
+
+function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter)
+    @info "Hit rrule"
+    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+
+    # f = K grad_log_p_y_given_f(f)
+    # fdot = Kdot grad_log_p_y_given_f(f) + K grad2_log_p_y_given_f(f) fdot
+    # fdot (I - K grad2_log_p_y_given_f(f)) = Kdot grad_log_p_y_given_f(f)
+    # fdot = (I - K grad2_log_p_y_given_f(f))⁻¹ Kdot grad_log_p_y_given_f(f)
+    # (I - K grad2_log_p_y_given_f(f)) = (I + K W) = (√W)⁻¹ (I + √W K √W) √W = (√W)⁻¹ B √W
+    # fdot = (√W)⁻¹ B⁻¹ √W Kdot grad_log_p_y_given_f(f)
 
-        fopt2, d_fopt = rrule_via_ad(config, newton_single_step, K, dist_y_given_f, Y)
-        return d_fopt(Δfopt)
-        dfopt_dK_times_Δfopt = d_fopt(Δfopt)[2]
+    # ∂f_opt = cache.Wsqrt \ (cache.B_ch \ (cache.Wsqrt * (ΔK * cache.d_loglik)))
+
+    # Re<Δf, fdot> = Re<Δf, Wsqrt\inv B\inv Wsqrt Kdot d_loglik>
+    #              = Re<Wsqrt' Binv' Wsqrt\inv' Δf d_loglik', Kdot >
+    #
+    # ΔK = Wsqrt * cache.B_ch' \ Wsqrt \ Δf_opt * cache.d_loglik'
+
+    function newton_pullback(Δf_opt)
+        ∂self = NoTangent()
+
+        # ΔK = cache.B_ch' \ Δf_opt * cache.d_loglik'
+	dfopt_dK_times_Δfopt = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
         dfopt_dlik_times_Δfopt = @not_implemented(
             "gradient of lml w.r.t. likelihood parameters"
         )
         dfopt_dY_times_Δfopt = @not_implemented("gradient of lml w.r.t. observations")
-        return (δself, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
+        return (∂self, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
     end
 
-    return (fnew, cache), newton_pullback
+    return f_opt, newton_pullback
 end
 
-function laplace_lml(K, dist_y_given_f, Y; f=zeros(length(Y)), maxiter)
-    f_opt, cache = newton_inner_loop(dist_y_given_f, Y, K; f, maxiter)
+#function _newton_inner_loop_dual(::Type{T}, dual_args...) where {T<:Dual}
+#    ȧrgs = (NO_FIELDS,  partials.(dual_args)...)
+#    args = (_newton_inner_loop, value.(dual_args)...)
+#    y, ẏ = frule(ȧrgs, args...)
+#    T(y, ẏ)
+#end
+#
+#function __newton_inner_loop(ϵ1::T1, ϵ2::T2, θ::T3) where {T1,T2,T3}
+#    T = promote_type(T1, T2, T3)
+#    if T <: Dual
+#        _solve_dual(T, ϵ1, ϵ2, θ)
+#    else
+#        _solve(ϵ1, ϵ2, θ)
+#    end
+#end
+
+
+#function ChainRulesCore.rrule(
+#    config::RuleConfig{>:HasReverseMode},
+#    ::typeof(newton_inner_loop),
+#    K,
+#    dist_y_given_f,
+#    Y;
+#    f_init,
+#    maxiter,
+#)
+#    fopt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
+#    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, fopt)
+#
+#    function newton_pullback(Δfopt)
+#        δself = NoTangent()
+#        function newton_single_step(K, dist_y_given_f, ys)
+#            fnew, cache = _newton_step(dist_y_given_f, ys, K, fopt)
+#            return fnew
+#        end
+#
+#        fopt2, d_fopt = rrule_via_ad(config, newton_single_step, K, dist_y_given_f, Y)
+#        return d_fopt(Δfopt)
+#        dfopt_dK_times_Δfopt = d_fopt(Δfopt)[2]
+#
+#        dfopt_dlik_times_Δfopt = @not_implemented(
+#            "gradient of lml w.r.t. likelihood parameters"
+#        )
+#        dfopt_dY_times_Δfopt = @not_implemented("gradient of lml w.r.t. observations")
+#        return (δself, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
+#    end
+#
+#    return (fnew, cache), newton_pullback
+#end
+
+function laplace_lml(K, dist_y_given_f, Y; f_init=zeros(length(Y)), maxiter)
+    f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
+    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
     return _laplace_lml(f_opt, cache)
 end
 
@@ -195,7 +282,7 @@ end
 #    return lml, laplace_lml_pullback
 #end
 
-function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
+function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options, newton_warmstart=true)
     lf = build_latent_gp(theta0)
     f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
 
@@ -205,7 +292,18 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options)
             @info "Hyperparameters: $theta"
         end
         lf = build_latent_gp(theta)
-        lml = laplace_lml!(f, lf(X), Y)
+        #lml = laplace_lml!(f, lf(X), Y)
+	lfX = lf(X)
+	K = cov(lfX.fx)
+	dist_y_given_f = lfX.lik
+        f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init=f, maxiter=100)
+	cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
+        lml = _laplace_lml(f_opt, cache)
+	Zygote.ignore() do
+		if newton_warmstart
+			f .= f_opt
+		end
+	end
         return -lml
     end
 

From 3c91a222d46b62168d2d2c2e9c36bcdfac3e2f12 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 12:19:37 +0300
Subject: [PATCH 10/60] frule bugfix

---
 src/laplace.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index e569d5c5..ae91fec3 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -173,7 +173,7 @@ function ChainRulesCore.frule((Δself, ΔK, Δdist_y_given_f, Δys), ::typeof(ne
 
     @info "Hit frule"
 
-    return f_opt, (∂f_opt, NoTangent())
+    return f_opt, ∂f_opt
 end
 
 function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter)

From 513d34d212a7ff2819dd2977c9d9916b3cc43616 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 16:57:13 +0300
Subject: [PATCH 11/60] clean up comments

---
 src/laplace.jl | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index ae91fec3..9c1dce0e 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -180,31 +180,36 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys
     @info "Hit rrule"
     f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
 
-    # f = K grad_log_p_y_given_f(f)
-    # fdot = Kdot grad_log_p_y_given_f(f) + K grad2_log_p_y_given_f(f) fdot
-    # fdot (I - K grad2_log_p_y_given_f(f)) = Kdot grad_log_p_y_given_f(f)
-    # fdot = (I - K grad2_log_p_y_given_f(f))⁻¹ Kdot grad_log_p_y_given_f(f)
-    # (I - K grad2_log_p_y_given_f(f)) = (I + K W) = (√W)⁻¹ (I + √W K √W) √W = (√W)⁻¹ B √W
-    # fdot = (√W)⁻¹ B⁻¹ √W Kdot grad_log_p_y_given_f(f)
+    # f = K (∇log p(y|f))                               (RW 3.17)
+    # δf = δK (∇log p(y|f)) + K δ(∇log p(y|f))
+    #    = δK (∇log p(y|f)) + K ∇(∇log p(y|f)) δf
+    # δf (I - K ∇∇log p(y|f)) = δK (∇log p(y|f))
+    # δf (I + K W) = δK (∇log p(y|f))
+    # δf = (I + K W)⁻¹ δK (∇log p(y|f))                 (RW 5.24)
+    # (I + K W) = (√W)⁻¹ (I + √W K √W) √W = (√W)⁻¹ B √W
+    # δf = (√W)⁻¹ B⁻¹ √W δK (∇log p(y|f))
 
     # ∂f_opt = cache.Wsqrt \ (cache.B_ch \ (cache.Wsqrt * (ΔK * cache.d_loglik)))
 
-    # Re<Δf, fdot> = Re<Δf, Wsqrt\inv B\inv Wsqrt Kdot d_loglik>
-    #              = Re<Wsqrt' Binv' Wsqrt\inv' Δf d_loglik', Kdot >
+    # Re<Δf, δf> = Re<Δf, Wsqrt\inv B\inv Wsqrt δK d_loglik>
+    #            = Re<Wsqrt' B\inv' Wsqrt\inv' Δf d_loglik', δK>
     #
-    # ΔK = Wsqrt * cache.B_ch' \ Wsqrt \ Δf_opt * cache.d_loglik'
+    # ΔK = Wsqrt' * cache.B_ch' \ Wsqrt' \ Δf_opt * cache.d_loglik'
 
     function newton_pullback(Δf_opt)
         ∂self = NoTangent()
 
-        # ΔK = cache.B_ch' \ Δf_opt * cache.d_loglik'
-	dfopt_dK_times_Δfopt = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
+        # ∂K = df/dK Δf
+	∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
-        dfopt_dlik_times_Δfopt = @not_implemented(
-            "gradient of lml w.r.t. likelihood parameters"
+        ∂dist_y_given_f = @not_implemented(
+            "gradient of Newton's method w.r.t. likelihood parameters"
         )
-        dfopt_dY_times_Δfopt = @not_implemented("gradient of lml w.r.t. observations")
-        return (∂self, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
+
+        ∂Y = @not_implemented("gradient of Newton's method w.r.t. observations")
+
+        return (∂self, ∂K, ∂dist_y_given_f, ∂Y)
+	#return (∂self, ∂K, NoTangent(), NoTangent())
     end
 
     return f_opt, newton_pullback

From 33af436d3bb112befc4ee6bc1db01997795545bc Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 17:30:56 +0300
Subject: [PATCH 12/60] intermediate cleanup (callbacks not tested)

---
 src/laplace.jl | 176 ++++++++++++-------------------------------------
 1 file changed, 41 insertions(+), 135 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 9c1dce0e..c19a2403 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -46,8 +46,6 @@ function _laplace_lml(f, c)
     return -c.a' * f / 2 + c.loglik - sum(log.(diag(c.B_ch.L)))
 end
 
-#function _laplace_lml_deriv()
-
 function laplace_f_cov(cache)
     # (K⁻¹ + W)⁻¹
     # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
@@ -76,21 +74,14 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior)
     K = cov(f_prior)
 
     res_array = []
-    for i in 1:maxiter
-        @info "  - Newton iteration $i"
-        fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
-
-        push!(res_array, LaplaceResult(f, fnew, cache))
-        # TODO don't do all these computations unless we actually want them
 
-        if isapprox(f, fnew)
-            @info "  + converged"
-            break  # converged
-        else
-            f = fnew
-        end
+    function store_result!(fnew, cache)
+        push!(res_array, LaplaceResult(copy(f), fnew, cache))
+        return f .= fnew
     end
 
+    _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init=f, maxiter, callback=store_result!)
+
     return res_array
 end
 
@@ -100,53 +91,22 @@ function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y; kwargs...)
     return f_post
 end
 
-"""
-laplace_lml(f::Vector, lfX::LatentFiniteGP, Y::Vector)
-
-`f` 
-"""
-function laplace_lml!(f, lfX, Y)
-    f_opt = Zygote.ignore() do
-        newt_res = laplace_steps(lfX.lik, lfX.fx, Y; f)
-        f_opt = newt_res[end].f
-        f .= f_opt
-        return f_opt
-    end
-
-    # TODO ideally I wouldn't copy&paste the following lines
-    # but we have to re-compute this outside the Zygote.ignore() to compute gradients
-    cache = _laplace_train_intermediates(lfX.lik, Y, cov(lfX.fx), f_opt)
-    return _laplace_lml(f_opt, cache)
-end
-
-function laplace_lml_nonewton(f_opt, lfX, Y)
-    cache = _laplace_train_intermediates(lfX.lik, Y, cov(lfX.fx), f_opt)
-    return _laplace_lml(f_opt, cache)
-    # d lml / d theta = \partial lml / \partial theta + \partial lml / \partial fhat \partial fhat / \partial theta
-end
-
-NEWTON_COUNTER = 0
-
-function _reset()
-	global NEWTON_COUNTER
-	NEWTON_COUNTER = 0
-end
-
-function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
-    global NEWTON_COUNTER
+function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=nothing)
     f = f_init
     cache = nothing
     for i in 1:maxiter
-	Zygote.ignore() do
-		@info "  - Newton iteration $i: f[1:3]=$(f[1:3])"
-	end
+        Zygote.ignore() do
+            @info "  - Newton iteration $i: f[1:3]=$(f[1:3])"
+        end
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
-	NEWTON_COUNTER += 1
+        if !isnothing(callback)
+            callback(fnew, cache)
+        end
 
         if isapprox(f, fnew)
-	    Zygote.ignore() do
-		    @info "  + converged"
-	    end
+            Zygote.ignore() do
+                @info "  + converged"
+            end
             break  # converged
         else
             f = fnew
@@ -160,7 +120,15 @@ function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
     return f_opt
 end
 
-function ChainRulesCore.frule((Δself, ΔK, Δdist_y_given_f, Δys), ::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter)
+function ChainRulesCore.frule(
+    (Δself, ΔK, Δdist_y_given_f, Δys),
+    ::typeof(newton_inner_loop),
+    K,
+    dist_y_given_f,
+    ys;
+    f_init,
+    maxiter,
+)
     f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
 
     # f = K grad_log_p_y_given_f(f)
@@ -176,7 +144,9 @@ function ChainRulesCore.frule((Δself, ΔK, Δdist_y_given_f, Δys), ::typeof(ne
     return f_opt, ∂f_opt
 end
 
-function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter)
+function ChainRulesCore.rrule(
+    ::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter
+)
     @info "Hit rrule"
     f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
 
@@ -200,7 +170,7 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys
         ∂self = NoTangent()
 
         # ∂K = df/dK Δf
-	∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
+        ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
         ∂dist_y_given_f = @not_implemented(
             "gradient of Newton's method w.r.t. likelihood parameters"
@@ -209,85 +179,21 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys
         ∂Y = @not_implemented("gradient of Newton's method w.r.t. observations")
 
         return (∂self, ∂K, ∂dist_y_given_f, ∂Y)
-	#return (∂self, ∂K, NoTangent(), NoTangent())
+        #return (∂self, ∂K, NoTangent(), NoTangent())
     end
 
     return f_opt, newton_pullback
 end
 
-#function _newton_inner_loop_dual(::Type{T}, dual_args...) where {T<:Dual}
-#    ȧrgs = (NO_FIELDS,  partials.(dual_args)...)
-#    args = (_newton_inner_loop, value.(dual_args)...)
-#    y, ẏ = frule(ȧrgs, args...)
-#    T(y, ẏ)
-#end
-#
-#function __newton_inner_loop(ϵ1::T1, ϵ2::T2, θ::T3) where {T1,T2,T3}
-#    T = promote_type(T1, T2, T3)
-#    if T <: Dual
-#        _solve_dual(T, ϵ1, ϵ2, θ)
-#    else
-#        _solve(ϵ1, ϵ2, θ)
-#    end
-#end
-
-
-#function ChainRulesCore.rrule(
-#    config::RuleConfig{>:HasReverseMode},
-#    ::typeof(newton_inner_loop),
-#    K,
-#    dist_y_given_f,
-#    Y;
-#    f_init,
-#    maxiter,
-#)
-#    fopt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
-#    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, fopt)
-#
-#    function newton_pullback(Δfopt)
-#        δself = NoTangent()
-#        function newton_single_step(K, dist_y_given_f, ys)
-#            fnew, cache = _newton_step(dist_y_given_f, ys, K, fopt)
-#            return fnew
-#        end
-#
-#        fopt2, d_fopt = rrule_via_ad(config, newton_single_step, K, dist_y_given_f, Y)
-#        return d_fopt(Δfopt)
-#        dfopt_dK_times_Δfopt = d_fopt(Δfopt)[2]
-#
-#        dfopt_dlik_times_Δfopt = @not_implemented(
-#            "gradient of lml w.r.t. likelihood parameters"
-#        )
-#        dfopt_dY_times_Δfopt = @not_implemented("gradient of lml w.r.t. observations")
-#        return (δself, dfopt_dK_times_Δfopt, dfopt_dlik_times_Δfopt, dfopt_dY_times_Δfopt)
-#    end
-#
-#    return (fnew, cache), newton_pullback
-#end
-
 function laplace_lml(K, dist_y_given_f, Y; f_init=zeros(length(Y)), maxiter)
     f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
     cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
     return _laplace_lml(f_opt, cache)
 end
 
-#function ChainRulesCore.rrule(::typeof(laplace_lml), K, dist_y_given_f, Y)
-#    newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
-#    f_opt = newt_res[end].f
-#    cache = newt_res[end].cache
-#    lml = _laplace_lml(f_opt, cache)
-#
-#    function laplace_lml_pullback(Δlml)
-#        ..._d(lfX.fx.f)
-#	[..._d("lfx.lik")]
-#        dlml_dlfX = ?
-#        return NoTangent(), dlml_dlfX * Δlml, @not_implemented("gradient of lml @not_implemented("gradient of lml w.r.t. observations")
-#    end
-#
-#    return lml, laplace_lml_pullback
-#end
-
-function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options, newton_warmstart=true)
+function optimize_elbo(
+    build_latent_gp, theta0, X, Y, optimizer, optim_options, newton_warmstart=true
+)
     lf = build_latent_gp(theta0)
     f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
 
@@ -298,17 +204,17 @@ function optimize_elbo(build_latent_gp, theta0, X, Y, optimizer, optim_options,
         end
         lf = build_latent_gp(theta)
         #lml = laplace_lml!(f, lf(X), Y)
-	lfX = lf(X)
-	K = cov(lfX.fx)
-	dist_y_given_f = lfX.lik
+        lfX = lf(X)
+        K = cov(lfX.fx)
+        dist_y_given_f = lfX.lik
         f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init=f, maxiter=100)
-	cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
+        cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
         lml = _laplace_lml(f_opt, cache)
-	Zygote.ignore() do
-		if newton_warmstart
-			f .= f_opt
-		end
-	end
+        Zygote.ignore() do
+            if newton_warmstart
+                f .= f_opt
+            end
+        end
         return -lml
     end
 

From 30b45ac3375ae4a2c13053aebe3a31df606fb8c7 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 17:58:44 +0300
Subject: [PATCH 13/60] fix callback support

---
 src/laplace.jl | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index c19a2403..e8e7fbf4 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -115,8 +115,8 @@ function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=not
     return f, cache
 end
 
-function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
-    f_opt, _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=nothing)
+    f_opt, _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback)
     return f_opt
 end
 
@@ -126,10 +126,9 @@ function ChainRulesCore.frule(
     K,
     dist_y_given_f,
     ys;
-    f_init,
-    maxiter,
+    kwargs...,
 )
-    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; kwargs...)
 
     # f = K grad_log_p_y_given_f(f)
     # fdot = Kdot grad_log_p_y_given_f(f) + K grad2_log_p_y_given_f(f) fdot
@@ -144,11 +143,9 @@ function ChainRulesCore.frule(
     return f_opt, ∂f_opt
 end
 
-function ChainRulesCore.rrule(
-    ::typeof(newton_inner_loop), K, dist_y_given_f, ys; f_init, maxiter
-)
+function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys; kwargs...)
     @info "Hit rrule"
-    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter)
+    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; kwargs...)
 
     # f = K (∇log p(y|f))                               (RW 3.17)
     # δf = δK (∇log p(y|f)) + K δ(∇log p(y|f))
@@ -192,7 +189,14 @@ function laplace_lml(K, dist_y_given_f, Y; f_init=zeros(length(Y)), maxiter)
 end
 
 function optimize_elbo(
-    build_latent_gp, theta0, X, Y, optimizer, optim_options, newton_warmstart=true
+    build_latent_gp,
+    theta0,
+    X,
+    Y,
+    optimizer,
+    optim_options;
+    newton_warmstart=true,
+    newton_callback=nothing,
 )
     lf = build_latent_gp(theta0)
     f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
@@ -203,11 +207,12 @@ function optimize_elbo(
             @info "Hyperparameters: $theta"
         end
         lf = build_latent_gp(theta)
-        #lml = laplace_lml!(f, lf(X), Y)
         lfX = lf(X)
         K = cov(lfX.fx)
         dist_y_given_f = lfX.lik
-        f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init=f, maxiter=100)
+        f_opt = newton_inner_loop(
+            K, dist_y_given_f, Y; f_init=f, maxiter=100, callback=newton_callback
+        )
         cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
         lml = _laplace_lml(f_opt, cache)
         Zygote.ignore() do

From 09e5e82052fe2054e34f238aaaea86514d7ae89c Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 17:59:11 +0300
Subject: [PATCH 14/60] Laplace initial tests

---
 test/Project.toml |  4 +++
 test/laplace.jl   | 89 +++++++++++++++++++++++++++++++++++++++++++++++
 test/runtests.jl  | 40 +++++++++++++--------
 3 files changed, 119 insertions(+), 14 deletions(-)
 create mode 100644 test/laplace.jl

diff --git a/test/Project.toml b/test/Project.toml
index a9600671..9962367f 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,13 +1,17 @@
 [deps]
 AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606"
+ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 AbstractGPs = "0.4, 0.5"
diff --git a/test/laplace.jl b/test/laplace.jl
new file mode 100644
index 00000000..d6446b45
--- /dev/null
+++ b/test/laplace.jl
@@ -0,0 +1,89 @@
+function generate_data()
+    Random.seed!(1)
+    X = range(0, 23.5; length=48)
+    fs = @. 3 * sin(10 + 0.6X) + sin(0.1X) - 1
+    # invlink = normcdf
+    invlink = logistic
+    ps = invlink.(fs)
+    Y = [rand(Bernoulli(p)) for p in ps]
+    return X, Y
+end
+
+dist_y_given_f(f) = Bernoulli(logistic(f))
+
+function build_latent_gp(theta)
+    variance = softplus(theta[1])
+    lengthscale = softplus(theta[2])
+    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
+    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
+end
+
+@testset "laplace" begin
+    X, Y = generate_data()
+    theta0 = [0.0, 1.0]
+
+    function objective(theta)
+        lf = build_latent_gp(theta)
+        lfX = lf(X)
+        f_init, K = mean_and_cov(lfX.fx)
+        lml = ApproximateGPs.laplace_lml(K, lfX.lik, Y; f_init, maxiter=100)
+        return -lml
+    end
+
+    @testset "NelderMead" begin
+        expected_thetahat = [7.708967951453345, 1.5182348363613536]
+
+        res = Optim.optimize(objective, theta0, NelderMead(); inplace=false)
+
+        @test res.minimizer ≈ expected_thetahat
+    end
+    @testset "gradient-based" begin
+        expected_thetahat = [7.709076337653239, 1.51820292019697]
+
+        res = Optim.optimize(
+            objective,
+            θ -> only(Zygote.gradient(objective, θ)),
+            theta0,
+            LBFGS();
+            inplace=false,
+        )
+        @info res
+
+        @test res.minimizer ≈ expected_thetahat
+
+        n_newton_coldstart = 0
+        function count_coldstart!(_, _)
+            return n_newton_coldstart += 1
+        end
+
+        _, res_cold = ApproximateGPs.optimize_elbo(
+            build_latent_gp,
+            theta0,
+            X,
+            Y,
+            LBFGS(),
+            Optim.Options(; iterations=1000);
+            newton_warmstart=false,
+            newton_callback=count_coldstart!,
+        )
+
+        n_newton_warmstart = 0
+        function count_warmstart!(_, _)
+            return n_newton_warmstart += 1
+        end
+
+        _, res_warm = ApproximateGPs.optimize_elbo(
+            build_latent_gp,
+            theta0,
+            X,
+            Y,
+            LBFGS(),
+            Optim.Options(; iterations=1000);
+            newton_warmstart=true,
+            newton_callback=count_warmstart!,
+        )
+
+        @test n_newton_warmstart < n_newton_coldstart
+        @test res_cold.minimizer ≈ res_warm.minimizer
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 23301c13..3d9dd9c2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,9 +4,13 @@ using ApproximateGPs
 using Flux
 using IterTools
 using AbstractGPs
+using AbstractGPs: LatentFiniteGP
 using Distributions
+using LogExpFunctions: logistic
 using LinearAlgebra
 using PDMats
+using Optim
+using Zygote
 
 const GROUP = get(ENV, "GROUP", "All")
 const PKGDIR = dirname(dirname(pathof(ApproximateGPs)))
@@ -14,19 +18,27 @@ const PKGDIR = dirname(dirname(pathof(ApproximateGPs)))
 include("test_utils.jl")
 
 @testset "ApproximateGPs" begin
-    include("svgp.jl")
-    println(" ")
-    @info "Ran svgp tests"
+    #include("expected_loglik.jl")
+    #println(" ")
+    #@info "Ran expected_loglik tests"
+    #
+    #@testset "SVGP" begin
+    #    include("svgp.jl")
+    #    println(" ")
+    #    @info "Ran svgp tests"
+    #
+    #    include("elbo.jl")
+    #    println(" ")
+    #    @info "Ran elbo tests"
+    #
+    #    include("equivalences.jl")
+    #    println(" ")
+    #    @info "Ran equivalences tests"
+    #end
 
-    include("elbo.jl")
-    println(" ")
-    @info "Ran elbo tests"
-
-    include("expected_loglik.jl")
-    println(" ")
-    @info "Ran expected_loglik tests"
-
-    include("equivalences.jl")
-    println(" ")
-    @info "Ran equivalences tests"
+    @testset "Laplace" begin
+        include("laplace.jl")
+        println(" ")
+        @info "Ran laplace tests"
+    end
 end

From 2591555bb6bff32100222806ad5be0ea4e0e548d Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:12:45 +0300
Subject: [PATCH 15/60] make argument order consistent

---
 src/laplace.jl  | 48 ++++++++++++++++++++++++++----------------------
 test/laplace.jl |  2 +-
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index e8e7fbf4..d0a8ce7b 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -42,8 +42,9 @@ function _newton_step(dist_y_given_f, ys, K, f)
     return fnew, cache
 end
 
-function _laplace_lml(f, c)
-    return -c.a' * f / 2 + c.loglik - sum(log.(diag(c.B_ch.L)))
+function _laplace_lml(f, cache)
+    # -a' * f / 2 + loglik - sum(log.(diag(B_ch.L)))
+    return -cache.a' * f / 2 + cache.loglik - sum(log.(diag(cache.B_ch.L)))
 end
 
 function laplace_f_cov(cache)
@@ -80,7 +81,7 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior)
         return f .= fnew
     end
 
-    _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init=f, maxiter, callback=store_result!)
+    _ = _newton_inner_loop(dist_y_given_f, ys, K; f_init=f, maxiter, callback=store_result!)
 
     return res_array
 end
@@ -91,7 +92,7 @@ function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y; kwargs...)
     return f_post
 end
 
-function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=nothing)
+function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=nothing)
     f = f_init
     cache = nothing
     for i in 1:maxiter
@@ -115,20 +116,21 @@ function _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=not
     return f, cache
 end
 
-function newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback=nothing)
-    f_opt, _ = _newton_inner_loop(K, dist_y_given_f, ys; f_init, maxiter, callback)
+# Currently, we have a separate function that returns only f_opt to simplify frule/rrule
+function newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=nothing)
+    f_opt, _ = _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback)
     return f_opt
 end
 
 function ChainRulesCore.frule(
-    (Δself, ΔK, Δdist_y_given_f, Δys),
+    (Δself, Δdist_y_given_f, Δys, ΔK),
     ::typeof(newton_inner_loop),
-    K,
     dist_y_given_f,
-    ys;
+    ys,
+    K;
     kwargs...,
 )
-    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; kwargs...)
+    f_opt, cache = _newton_inner_loop(dist_y_given_f, ys, K; kwargs...)
 
     # f = K grad_log_p_y_given_f(f)
     # fdot = Kdot grad_log_p_y_given_f(f) + K grad2_log_p_y_given_f(f) fdot
@@ -143,9 +145,9 @@ function ChainRulesCore.frule(
     return f_opt, ∂f_opt
 end
 
-function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys; kwargs...)
+function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K; kwargs...)
     @info "Hit rrule"
-    f_opt, cache = _newton_inner_loop(K, dist_y_given_f, ys; kwargs...)
+    f_opt, cache = _newton_inner_loop(dist_y_given_f, ys, K; kwargs...)
 
     # f = K (∇log p(y|f))                               (RW 3.17)
     # δf = δK (∇log p(y|f)) + K δ(∇log p(y|f))
@@ -166,25 +168,27 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), K, dist_y_given_f, ys
     function newton_pullback(Δf_opt)
         ∂self = NoTangent()
 
-        # ∂K = df/dK Δf
-        ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
-
         ∂dist_y_given_f = @not_implemented(
             "gradient of Newton's method w.r.t. likelihood parameters"
         )
 
         ∂Y = @not_implemented("gradient of Newton's method w.r.t. observations")
 
-        return (∂self, ∂K, ∂dist_y_given_f, ∂Y)
-        #return (∂self, ∂K, NoTangent(), NoTangent())
+        # ∂K = df/dK Δf
+        ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
+
+        return (∂self, ∂dist_y_given_f, ∂Y, ∂K)
+        #return (∂self, NoTangent(), NoTangent(), ∂K)
     end
 
     return f_opt, newton_pullback
 end
 
-function laplace_lml(K, dist_y_given_f, Y; f_init=zeros(length(Y)), maxiter)
-    f_opt = newton_inner_loop(K, dist_y_given_f, Y; f_init, maxiter)
-    cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
+function laplace_lml(
+    dist_y_given_f, ys, K; f_init=zeros(length(Y)), maxiter, newton_kwargs...
+)
+    f_opt = newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, newton_kwargs...)
+    cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
     return _laplace_lml(f_opt, cache)
 end
 
@@ -211,7 +215,7 @@ function optimize_elbo(
         K = cov(lfX.fx)
         dist_y_given_f = lfX.lik
         f_opt = newton_inner_loop(
-            K, dist_y_given_f, Y; f_init=f, maxiter=100, callback=newton_callback
+            dist_y_given_f, Y, K; f_init=f, maxiter=100, callback=newton_callback
         )
         cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
         lml = _laplace_lml(f_opt, cache)
@@ -242,7 +246,7 @@ function optimize_elbo(
 end
 
 struct LaplacePosteriorGP{Tprior,Tdata} <: AbstractGPs.AbstractGP
-    prior::Tprior  # this is lfx.fx; should we store lfx itself (including lik) instead?
+    prior::Tprior  # TODO: this is lfx.fx; should we store lfx itself (including lik) instead?
     data::Tdata
 end
 
diff --git a/test/laplace.jl b/test/laplace.jl
index d6446b45..d4a1ce69 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -26,7 +26,7 @@ end
         lf = build_latent_gp(theta)
         lfX = lf(X)
         f_init, K = mean_and_cov(lfX.fx)
-        lml = ApproximateGPs.laplace_lml(K, lfX.lik, Y; f_init, maxiter=100)
+        lml = ApproximateGPs.laplace_lml(lfX.lik, Y, K; f_init, maxiter=100)
         return -lml
     end
 

From 5d58f5da134b989065e39d1973144df15703a9d8 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:19:27 +0300
Subject: [PATCH 16/60] more cleanup

---
 src/laplace.jl  | 120 +++++++++++++++++++++++++-----------------------
 test/laplace.jl |   3 +-
 2 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index d0a8ce7b..c585a1a0 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -47,51 +47,6 @@ function _laplace_lml(f, cache)
     return -cache.a' * f / 2 + cache.loglik - sum(log.(diag(cache.B_ch.L)))
 end
 
-function laplace_f_cov(cache)
-    # (K⁻¹ + W)⁻¹
-    # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
-    # = (√W⁻¹) (√W⁻¹ K⁻¹ √W⁻¹ + I)⁻¹ (√W⁻¹)
-    # ; (I + C⁻¹)⁻¹ = I - (I + C)⁻¹
-    # = (√W⁻¹) (I - (I + √W K √W)⁻¹) (√W⁻¹)
-    # = (√W⁻¹) (I - B⁻¹) (√W⁻¹)
-    B_ch = cache.B_ch
-    Wsqrt_inv = inv(cache.Wsqrt)
-    return Wsqrt_inv * (I - inv(B_ch)) * Wsqrt_inv
-end
-
-function LaplaceResult(f, fnew, cache)
-    # TODO should we use fnew?
-    f_cov = laplace_f_cov(cache)
-    q = MvNormal(f, AbstractGPs._symmetric(f_cov))
-    lml_approx = _laplace_lml(f, cache)
-
-    return (; f, f_cov, q, lml_approx, cache)
-end
-
-function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior))
-    @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
-    @assert length(ys) == length(f_prior) == length(f)
-
-    K = cov(f_prior)
-
-    res_array = []
-
-    function store_result!(fnew, cache)
-        push!(res_array, LaplaceResult(copy(f), fnew, cache))
-        return f .= fnew
-    end
-
-    _ = _newton_inner_loop(dist_y_given_f, ys, K; f_init=f, maxiter, callback=store_result!)
-
-    return res_array
-end
-
-function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, Y; kwargs...)
-    newt_res = laplace_steps(lfX.lik, lfX.fx, Y; kwargs...)
-    f_post = LaplacePosteriorGP(lfX.fx, newt_res[end])
-    return f_post
-end
-
 function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=nothing)
     f = f_init
     cache = nothing
@@ -172,12 +127,12 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K
             "gradient of Newton's method w.r.t. likelihood parameters"
         )
 
-        ∂Y = @not_implemented("gradient of Newton's method w.r.t. observations")
+        ∂ys = @not_implemented("gradient of Newton's method w.r.t. observations")
 
         # ∂K = df/dK Δf
         ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
-        return (∂self, ∂dist_y_given_f, ∂Y, ∂K)
+        return (∂self, ∂dist_y_given_f, ∂ys, ∂K)
         #return (∂self, NoTangent(), NoTangent(), ∂K)
     end
 
@@ -185,18 +140,20 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K
 end
 
 function laplace_lml(
-    dist_y_given_f, ys, K; f_init=zeros(length(Y)), maxiter, newton_kwargs...
+    dist_y_given_f, ys, K; f_init=zeros(length(ys)), maxiter, newton_kwargs...
 )
     f_opt = newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, newton_kwargs...)
     cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
     return _laplace_lml(f_opt, cache)
 end
 
+
+
 function optimize_elbo(
     build_latent_gp,
     theta0,
     X,
-    Y,
+    ys,
     optimizer,
     optim_options;
     newton_warmstart=true,
@@ -215,9 +172,9 @@ function optimize_elbo(
         K = cov(lfX.fx)
         dist_y_given_f = lfX.lik
         f_opt = newton_inner_loop(
-            dist_y_given_f, Y, K; f_init=f, maxiter=100, callback=newton_callback
+            dist_y_given_f, ys, K; f_init=f, maxiter=100, callback=newton_callback
         )
-        cache = _laplace_train_intermediates(dist_y_given_f, Y, K, f_opt)
+        cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
         lml = _laplace_lml(f_opt, cache)
         Zygote.ignore() do
             if newton_warmstart
@@ -241,13 +198,62 @@ function optimize_elbo(
     #)
 
     lf = build_latent_gp(training_results.minimizer)
-    f_post = laplace_posterior(lf(X), Y; f)
+
+    f_post = laplace_posterior(lf(X), ys; f)
     return f_post, training_results
 end
 
-struct LaplacePosteriorGP{Tprior,Tdata} <: AbstractGPs.AbstractGP
+
+
+function laplace_f_cov(cache)
+    # (K⁻¹ + W)⁻¹
+    # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
+    # = (√W⁻¹) (√W⁻¹ K⁻¹ √W⁻¹ + I)⁻¹ (√W⁻¹)
+    # ; (I + C⁻¹)⁻¹ = I - (I + C)⁻¹
+    # = (√W⁻¹) (I - (I + √W K √W)⁻¹) (√W⁻¹)
+    # = (√W⁻¹) (I - B⁻¹) (√W⁻¹)
+    B_ch = cache.B_ch
+    Wsqrt_inv = inv(cache.Wsqrt)
+    return Wsqrt_inv * (I - inv(B_ch)) * Wsqrt_inv
+end
+
+function LaplaceResult(f, fnew, cache)
+    # TODO should we use fnew?
+    f_cov = laplace_f_cov(cache)
+    q = MvNormal(f, AbstractGPs._symmetric(f_cov))
+    lml_approx = _laplace_lml(f, cache)
+
+    return (; f, f_cov, q, lml_approx, cache)
+end
+
+function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior))
+    @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
+    @assert length(ys) == length(f_prior) == length(f)
+
+    K = cov(f_prior)
+
+    res_array = []
+
+    function store_result!(fnew, cache)
+        push!(res_array, LaplaceResult(copy(f), fnew, cache))
+        return f .= fnew
+    end
+
+    _ = _newton_inner_loop(dist_y_given_f, ys, K; f_init=f, maxiter, callback=store_result!)
+
+    return res_array
+end
+
+function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, ys; kwargs...)
+    newt_res = laplace_steps(lfX.lik, lfX.fx, ys; kwargs...)
+    cache = newt_res[end].cache
+    f_post = LaplacePosteriorGP(lfX.fx, cache)
+    return f_post
+end
+
+struct LaplacePosteriorGP{Tprior,Tcache} <: AbstractGPs.AbstractGP
     prior::Tprior  # TODO: this is lfx.fx; should we store lfx itself (including lik) instead?
-    data::Tdata
+    cache::Tcache
 end
 
 function _laplace_predict_intermediates(cache, prior_at_x, xnew)
@@ -259,19 +265,19 @@ function _laplace_predict_intermediates(cache, prior_at_x, xnew)
 end
 
 function StatsBase.mean_and_var(f::LaplacePosteriorGP, x::AbstractVector)
-    f_mean, v = _laplace_predict_intermediates(f.data.cache, f.prior, x)
+    f_mean, v = _laplace_predict_intermediates(f.cache, f.prior, x)
     f_var = var(f.prior.f, x) - vec(sum(v .^ 2; dims=1))
     return f_mean, f_var
 end
 
 function StatsBase.mean_and_cov(f::LaplacePosteriorGP, x::AbstractVector)
-    f_mean, v = _laplace_predict_intermediates(f.data.cache, f.prior, x)
+    f_mean, v = _laplace_predict_intermediates(f.cache, f.prior, x)
     f_cov = cov(f.prior.f, x) - v' * v
     return f_mean, f_cov
 end
 
 function Statistics.mean(f::LaplacePosteriorGP, x::AbstractVector)
-    d_loglik = f.data.cache.d_loglik
+    d_loglik = f.cache.d_loglik
     return mean(f.prior.f, x) + cov(f.prior.f, f.prior.x, x)' * d_loglik
 end
 
diff --git a/test/laplace.jl b/test/laplace.jl
index d4a1ce69..e7d9f0ed 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -83,7 +83,8 @@ end
             newton_callback=count_warmstart!,
         )
 
-        @test n_newton_warmstart < n_newton_coldstart
+        @info "Newton steps: $n_newton_coldstart (coldstart) vs $n_newton_warmstart (warmstart)"
+        @test n_newton_coldstart - n_newton_warmstart > 100
         @test res_cold.minimizer ≈ res_warm.minimizer
     end
 end

From 6c4e56db20d5c93a0f37a207e84c4da9246e43e2 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:22:44 +0300
Subject: [PATCH 17/60] cleanup

---
 src/laplace.jl | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index c585a1a0..22ffca9d 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -147,8 +147,6 @@ function laplace_lml(
     return _laplace_lml(f_opt, cache)
 end
 
-
-
 function optimize_elbo(
     build_latent_gp,
     theta0,
@@ -169,10 +167,8 @@ function optimize_elbo(
         end
         lf = build_latent_gp(theta)
         lfX = lf(X)
-        K = cov(lfX.fx)
-        dist_y_given_f = lfX.lik
         f_opt = newton_inner_loop(
-            dist_y_given_f, ys, K; f_init=f, maxiter=100, callback=newton_callback
+            lfX.lik, ys, cov(lfX.fx); f_init=f, maxiter=100, callback=newton_callback
         )
         cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
         lml = _laplace_lml(f_opt, cache)
@@ -203,8 +199,6 @@ function optimize_elbo(
     return f_post, training_results
 end
 
-
-
 function laplace_f_cov(cache)
     # (K⁻¹ + W)⁻¹
     # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)

From ead526f701f20d9034b964ae64e2a1af99498d4e Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:48:27 +0300
Subject: [PATCH 18/60] cleanup

---
 src/laplace.jl | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 22ffca9d..dd137eae 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -139,18 +139,29 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K
     return f_opt, newton_pullback
 end
 
+function laplace_lml(dist_y_given_f, ys, K, f_opt)
+    cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
+    return _laplace_lml(f_opt, cache)
+end
+
 function laplace_lml(
     dist_y_given_f, ys, K; f_init=zeros(length(ys)), maxiter, newton_kwargs...
 )
     f_opt = newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, newton_kwargs...)
-    cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
-    return _laplace_lml(f_opt, cache)
+    return laplace_lml(dist_y_given_f, ys, K, f_opt)
+end
+
+function laplace_f_and_lml(lfx::LatentFiniteGP, ys; newton_kwargs...)
+    K = cov(lfx.fx)
+    f_opt = newton_inner_loop(lfx.lik, ys, K; newton_kwargs...)
+    lml = laplace_lml(lfx.lik, ys, K, f_opt)
+    return f_opt, lml
 end
 
 function optimize_elbo(
     build_latent_gp,
     theta0,
-    X,
+    xs,
     ys,
     optimizer,
     optim_options;
@@ -158,7 +169,7 @@ function optimize_elbo(
     newton_callback=nothing,
 )
     lf = build_latent_gp(theta0)
-    f = mean(lf(X).fx)  # will be mutated in-place to "warm-start" the Newton steps
+    f = mean(lf(xs).fx)  # will be mutated in-place to "warm-start" the Newton steps
 
     function objective(theta)
         Zygote.ignore() do
@@ -166,12 +177,9 @@ function optimize_elbo(
             @info "Hyperparameters: $theta"
         end
         lf = build_latent_gp(theta)
-        lfX = lf(X)
-        f_opt = newton_inner_loop(
-            lfX.lik, ys, cov(lfX.fx); f_init=f, maxiter=100, callback=newton_callback
+        f_opt, lml = laplace_f_and_lml(
+            lf(xs), ys; f_init=f, maxiter=100, callback=newton_callback
         )
-        cache = _laplace_train_intermediates(dist_y_given_f, ys, K, f_opt)
-        lml = _laplace_lml(f_opt, cache)
         Zygote.ignore() do
             if newton_warmstart
                 f .= f_opt
@@ -188,14 +196,10 @@ function optimize_elbo(
         optim_options;
         inplace=false,
     )
-    #training_results = Optim.optimize(
-    #    objective, theta0, optimizer, optim_options;
-    #    inplace=false,
-    #)
 
     lf = build_latent_gp(training_results.minimizer)
 
-    f_post = laplace_posterior(lf(X), ys; f)
+    f_post = laplace_posterior(lf(xs), ys; f)
     return f_post, training_results
 end
 

From fe7662670a0e8ce92a128db8460d6dfb06ae56cd Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:48:43 +0300
Subject: [PATCH 19/60] @info -> @debug and ChainRulesTestUtil workaround

---
 src/laplace.jl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index dd137eae..647eda42 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -52,7 +52,7 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
     cache = nothing
     for i in 1:maxiter
         Zygote.ignore() do
-            @info "  - Newton iteration $i: f[1:3]=$(f[1:3])"
+            @debug "  - Newton iteration $i: f[1:3]=$(f[1:3])"
         end
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
         if !isnothing(callback)
@@ -61,7 +61,7 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
 
         if isapprox(f, fnew)
             Zygote.ignore() do
-                @info "  + converged"
+                @debug "  + converged"
             end
             break  # converged
         else
@@ -95,13 +95,13 @@ function ChainRulesCore.frule(
     # fdot = (√W)⁻¹ B⁻¹ √W Kdot grad_log_p_y_given_f(f)
     ∂f_opt = cache.Wsqrt \ (cache.B_ch \ (cache.Wsqrt * (ΔK * cache.d_loglik)))
 
-    @info "Hit frule"
+    @debug "Hit frule"
 
     return f_opt, ∂f_opt
 end
 
 function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K; kwargs...)
-    @info "Hit rrule"
+    @debug "Hit rrule"
     f_opt, cache = _newton_inner_loop(dist_y_given_f, ys, K; kwargs...)
 
     # f = K (∇log p(y|f))                               (RW 3.17)
@@ -132,8 +132,8 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K
         # ∂K = df/dK Δf
         ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
-        return (∂self, ∂dist_y_given_f, ∂ys, ∂K)
-        #return (∂self, NoTangent(), NoTangent(), ∂K)
+        #return (∂self, ∂dist_y_given_f, ∂ys, ∂K)
+        return (∂self, NoTangent(), NoTangent(), ∂K)  # workaround until https://github.com/JuliaDiff/ChainRulesTestUtils.jl/pull/218 is merged
     end
 
     return f_opt, newton_pullback
@@ -173,8 +173,8 @@ function optimize_elbo(
 
     function objective(theta)
         Zygote.ignore() do
-            # Zygote does not like the try/catch within @info
-            @info "Hyperparameters: $theta"
+            # Zygote does not like the try/catch within @info etc.
+            @debug "Hyperparameters: $theta"
         end
         lf = build_latent_gp(theta)
         f_opt, lml = laplace_f_and_lml(

From f7439d60451c7cfb4e311a4c7b37db085e453d7b Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 21 Sep 2021 18:48:59 +0300
Subject: [PATCH 20/60] chainrule tests

---
 test/laplace.jl  | 20 ++++++++++++++++++--
 test/runtests.jl |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index e7d9f0ed..acdfbbd3 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -18,7 +18,22 @@ function build_latent_gp(theta)
     return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
 end
 
-@testset "laplace" begin
+@testset "chainrule" begin
+    xs = [0.2, 0.3, 0.7]
+    ys = [1, 1, 0]
+    K = kernelmatrix(with_lengthscale(Matern52Kernel(), 0.3), xs)
+    test_frule(
+        ApproximateGPs.newton_inner_loop,
+        dist_y_given_f,
+        ys,
+        K;
+        fkwargs=(; f_init=zero(ys), maxiter=100),
+        rtol=0.01,
+    )
+    #test_rrule(ApproximateGPs.newton_inner_loop, dist_y_given_f, ys, K; fkwargs=(;f_init=zero(ys), maxiter=100), rtol=0.05)  # my rrule might still be broken
+end
+
+@testset "optimization" begin
     X, Y = generate_data()
     theta0 = [0.0, 1.0]
 
@@ -34,6 +49,7 @@ end
         expected_thetahat = [7.708967951453345, 1.5182348363613536]
 
         res = Optim.optimize(objective, theta0, NelderMead(); inplace=false)
+        #@info res
 
         @test res.minimizer ≈ expected_thetahat
     end
@@ -47,7 +63,7 @@ end
             LBFGS();
             inplace=false,
         )
-        @info res
+        #@info res
 
         @test res.minimizer ≈ expected_thetahat
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 3d9dd9c2..1bd8b555 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,6 +11,7 @@ using LinearAlgebra
 using PDMats
 using Optim
 using Zygote
+using ChainRulesTestUtils
 
 const GROUP = get(ENV, "GROUP", "All")
 const PKGDIR = dirname(dirname(pathof(ApproximateGPs)))

From d71ca207d1befc8032c6201a3435de0c1cc87498 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Wed, 22 Sep 2021 10:08:40 +0300
Subject: [PATCH 21/60] add @info for res_cold/res_warm

---
 test/laplace.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/laplace.jl b/test/laplace.jl
index acdfbbd3..1380c42b 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -82,6 +82,7 @@ end
             newton_warmstart=false,
             newton_callback=count_coldstart!,
         )
+        @info "Coldstart:\n$res_cold"
 
         n_newton_warmstart = 0
         function count_warmstart!(_, _)
@@ -98,6 +99,7 @@ end
             newton_warmstart=true,
             newton_callback=count_warmstart!,
         )
+        @info "Warmstart:\n$res_warm"
 
         @info "Newton steps: $n_newton_coldstart (coldstart) vs $n_newton_warmstart (warmstart)"
         @test n_newton_coldstart - n_newton_warmstart > 100

From ecb29d9690c9faf6f574e71d4aaaa414ee58e39e Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Wed, 22 Sep 2021 12:00:52 +0300
Subject: [PATCH 22/60] cleanup

---
 src/laplace.jl | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 647eda42..f2262d60 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -63,6 +63,7 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
             Zygote.ignore() do
                 @debug "  + converged"
             end
+            #f = fnew
             break  # converged
         else
             f = fnew
@@ -145,19 +146,38 @@ function laplace_lml(dist_y_given_f, ys, K, f_opt)
 end
 
 function laplace_lml(
-    dist_y_given_f, ys, K; f_init=zeros(length(ys)), maxiter, newton_kwargs...
+    dist_y_given_f, ys, K; f_init=zeros(length(ys)), maxiter=100, newton_kwargs...
 )
     f_opt = newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, newton_kwargs...)
     return laplace_lml(dist_y_given_f, ys, K, f_opt)
 end
 
+function laplace_lml(lfx::LatentFiniteGP, ys; newton_kwargs...)
+    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)
+    return laplace_lml(dist_y_given_f, ys, K; newton_kwargs...)
+end
+
 function laplace_f_and_lml(lfx::LatentFiniteGP, ys; newton_kwargs...)
-    K = cov(lfx.fx)
-    f_opt = newton_inner_loop(lfx.lik, ys, K; newton_kwargs...)
-    lml = laplace_lml(lfx.lik, ys, K, f_opt)
+    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)
+    f_opt = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
+    lml = laplace_lml(dist_y_given_f, ys, K, f_opt)
     return f_opt, lml
 end
 
+function _check_laplace_inputs(
+    lfx::LatentFiniteGP, ys; f_init=nothing, maxiter=100, newton_kwargs...
+)
+    fx = lfx.fx
+    @assert mean(fx) == zero(mean(fx))  # might work with non-zero prior mean but not checked
+    @assert length(ys) == length(fx)
+    dist_y_given_f = lfx.lik
+    K = cov(fx)
+    if isnothing(f_init)
+        f_init = mean(fx)
+    end
+    return dist_y_given_f, K, (; f_init, maxiter, newton_kwargs...)
+end
+
 function optimize_elbo(
     build_latent_gp,
     theta0,
@@ -199,7 +219,7 @@ function optimize_elbo(
 
     lf = build_latent_gp(training_results.minimizer)
 
-    f_post = laplace_posterior(lf(xs), ys; f)
+    f_post = laplace_posterior(lf(xs), ys; f_init=f)
     return f_post, training_results
 end
 
@@ -242,10 +262,10 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior)
     return res_array
 end
 
-function laplace_posterior(lfX::AbstractGPs.LatentFiniteGP, ys; kwargs...)
-    newt_res = laplace_steps(lfX.lik, lfX.fx, ys; kwargs...)
-    cache = newt_res[end].cache
-    f_post = LaplacePosteriorGP(lfX.fx, cache)
+function laplace_posterior(lfx::AbstractGPs.LatentFiniteGP, ys; newton_kwargs...)
+    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)
+    _, cache = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
+    f_post = LaplacePosteriorGP(lfx.fx, cache)
     return f_post
 end
 

From b2753327aa3acbec77df0055e202f70aeb76fc25 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Wed, 22 Sep 2021 12:01:32 +0300
Subject: [PATCH 23/60] explicit FiniteDifferences gradient test on laplace_lml

---
 test/Project.toml |  1 +
 test/laplace.jl   | 19 +++++++++++++++++++
 test/runtests.jl  |  1 +
 3 files changed, 21 insertions(+)

diff --git a/test/Project.toml b/test/Project.toml
index 9962367f..6e541c11 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -3,6 +3,7 @@ AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/test/laplace.jl b/test/laplace.jl
index 1380c42b..66516ec8 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -18,6 +18,25 @@ function build_latent_gp(theta)
     return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
 end
 
+@testset "Gaussian" begin
+    # should check for convergence in one step, and agreement with exact GPR
+end
+
+@testset "gradients" begin
+    X, Y = generate_data()
+    @testset "laplace_lml" begin
+        theta0 = rand(2)
+        function objective(theta)
+            lf = build_latent_gp(theta)
+            lml = ApproximateGPs.laplace_lml(lf(X), Y)
+            return -lml
+        end
+        fd_grad = only(FiniteDifferences.grad(central_fdm(5, 1), objective, theta0))
+        ad_grad = only(Zygote.gradient(objective, theta0))
+        @test ad_grad ≈ fd_grad
+    end
+end
+
 @testset "chainrule" begin
     xs = [0.2, 0.3, 0.7]
     ys = [1, 1, 0]
diff --git a/test/runtests.jl b/test/runtests.jl
index 1bd8b555..8c881eb3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,6 +12,7 @@ using PDMats
 using Optim
 using Zygote
 using ChainRulesTestUtils
+using FiniteDifferences
 
 const GROUP = get(ENV, "GROUP", "All")
 const PKGDIR = dirname(dirname(pathof(ApproximateGPs)))

From 0d8eb02dc3406268e6b901bac64c47bfca0cf110 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Wed, 22 Sep 2021 16:06:46 +0300
Subject: [PATCH 24/60] format

---
 demo_laplace.jl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/demo_laplace.jl b/demo_laplace.jl
index 9959b664..82ad1d58 100644
--- a/demo_laplace.jl
+++ b/demo_laplace.jl
@@ -9,7 +9,7 @@ using ApproximateGPs
 
 Random.seed!(1)
 X = range(0, 23.5; length=48)
-fs = @. 3*sin(10 + 0.6X) + sin(0.1X) - 1
+fs = @. 3 * sin(10 + 0.6X) + sin(0.1X) - 1
 # invlink = normcdf
 invlink = logistic
 ps = invlink.(fs)
@@ -18,7 +18,7 @@ Y = [rand(Bernoulli(p)) for p in ps]
 function plot_data()
     plot()
     plot!(X, ps)
-    scatter!(X, Y)
+    return scatter!(X, Y)
 end
 
 dist_y_given_f(f) = Bernoulli(invlink(f))
@@ -32,7 +32,7 @@ end
 
 function plot_samples!(Xgrid, fpost; samples=100, color=2)
     fsamples = rand(fpost(Xgrid, 1e-8), samples)
-    plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
+    return plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
 end
 
 using Zygote
@@ -50,7 +50,9 @@ lfX = lf(X)
 
 newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
 
-f_post, opt_res = ApproximateGPs.optimize_elbo(build_latent_gp, theta0, X, Y, NelderMead(), optim_options)
+f_post, opt_res = ApproximateGPs.optimize_elbo(
+    build_latent_gp, theta0, X, Y, NelderMead(), optim_options
+)
 
 theta1 = opt_res.minimizer
 
@@ -74,7 +76,6 @@ using FiniteDifferences
 
 FiniteDifferences.grad(central_fdm(5, 1), full_objective, theta0)
 
-
 function comp_lml(theta)
     _lf = build_latent_gp(theta)
     K = kernelmatrix(_lf.f.kernel, X)

From af07036833cc1159771f405b32595c13e0fd9c77 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Wed, 22 Sep 2021 16:07:47 +0300
Subject: [PATCH 25/60] format

---
 src/ep.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/ep.jl b/src/ep.jl
index f29d9551..45528431 100644
--- a/src/ep.jl
+++ b/src/ep.jl
@@ -86,7 +86,7 @@ function ep_single_site_update(ep_problem, ep_state, i::Int)
     alik_i = epsite_dist(ep_state.sites[i].q)
     cav_i = div_dist(q_fi, alik_i)
     qhat_i = moment_match(cav_i, ep_problem.lik_evals[i])
-    new_t = div_dist(qhat_i.q, cav_i)
+    return new_t = div_dist(qhat_i.q, cav_i)
     #delta_eta = 
     #new_q = rank_one_update(ep_state.q, 
 
@@ -102,12 +102,12 @@ function EPResult(results)
     return (; results)
 end
 
-function ep_steps(dist_y_given_f, f_prior, y; maxiter = 100)
+function ep_steps(dist_y_given_f, f_prior, y; maxiter=100)
     f = mean(f_prior)
     @assert f == zero(f)  # might work with non-zero prior mean but not checked
     converged = false
     res_array = []
-    for i = 1:maxiter
+    for i in 1:maxiter
         results = ep_step!(f, dist_y_given_f, f_prior, y)
         push!(res_array, EPResult(results))
         if isapprox(f, results.fnew)
@@ -118,4 +118,3 @@ function ep_steps(dist_y_given_f, f_prior, y; maxiter = 100)
     end
     return res_array
 end
-

From 6f08e73cc61bc38d11462061dc6c6b2891c7038b Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 11:32:21 +0300
Subject: [PATCH 26/60] remove Zygote dependency - part 1

---
 src/laplace.jl | 56 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index f2262d60..5ecbd10f 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -1,3 +1,7 @@
+# workaround for https://github.com/JuliaDiff/ChainRulesCore.jl/issues/470 to avoid Zygote dependency
+ignore_ad(closure) = closure()
+@non_differentiable ignore_ad(closure)
+
 function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     # Ψ = log p(y|f) + log p(f)
     #   = loglik + log p(f)
@@ -51,7 +55,7 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
     f = f_init
     cache = nothing
     for i in 1:maxiter
-        Zygote.ignore() do
+        ignore_ad() do
             @debug "  - Newton iteration $i: f[1:3]=$(f[1:3])"
         end
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
@@ -60,7 +64,7 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
         end
 
         if isapprox(f, fnew)
-            Zygote.ignore() do
+            ignore_ad() do
                 @debug "  + converged"
             end
             #f = fnew
@@ -178,36 +182,62 @@ function _check_laplace_inputs(
     return dist_y_given_f, K, (; f_init, maxiter, newton_kwargs...)
 end
 
-function optimize_elbo(
+function build_laplace_objective!(
+    f,
     build_latent_gp,
-    theta0,
     xs,
-    ys,
-    optimizer,
-    optim_options;
+    ys;
     newton_warmstart=true,
     newton_callback=nothing,
+    newton_maxiter=100,
 )
-    lf = build_latent_gp(theta0)
-    f = mean(lf(xs).fx)  # will be mutated in-place to "warm-start" the Newton steps
+    initialize_f = true
 
     function objective(theta)
-        Zygote.ignore() do
+        lf = build_latent_gp(theta)
+        lfx = lf(xs)
+        ignore_ad() do
             # Zygote does not like the try/catch within @info etc.
             @debug "Hyperparameters: $theta"
+            if initialize_f
+                f .= mean(lfx.fx)
+            end
         end
-        lf = build_latent_gp(theta)
         f_opt, lml = laplace_f_and_lml(
-            lf(xs), ys; f_init=f, maxiter=100, callback=newton_callback
+            lfx, ys; f_init=f, maxiter=newton_maxiter, callback=newton_callback
         )
-        Zygote.ignore() do
+        ignore_ad() do
             if newton_warmstart
                 f .= f_opt
+                initialize_f = false
             end
         end
         return -lml
     end
 
+    return objective
+end
+
+function build_laplace_objective(build_latent_gp, xs, ys; kwargs...)
+    f = similar(xs, length(xs))  # will be mutated in-place to "warm-start" the Newton steps
+    return build_laplace_objective!(f, build_latent_gp, xs, ys; kwargs...)
+end
+
+function optimize_elbo(
+    build_latent_gp,
+    theta0,
+    xs,
+    ys,
+    optimizer,
+    optim_options;
+    newton_warmstart=true,
+    newton_callback=nothing,
+)
+    f = similar(xs, length(xs))  # will be mutated in-place to "warm-start" the Newton steps
+    objective = build_laplace_objective!(
+        f, build_latent_gp, xs, ys; newton_warmstart, newton_callback
+    )
+
     training_results = Optim.optimize(
         objective,
         θ -> only(Zygote.gradient(objective, θ)),

From f84ed86971f549c4a8df3ea7f6016a5e9093b41d Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 11:59:12 +0300
Subject: [PATCH 27/60] remove Zygote dependency - part 2

---
 Project.toml          |  1 -
 src/ApproximateGPs.jl |  4 +++-
 src/laplace.jl        | 30 ------------------------------
 test/Project.toml     |  5 +++++
 test/laplace.jl       | 40 +++++++++++++++++++++++++++++++++-------
 5 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/Project.toml b/Project.toml
index 5f2cb9ff..d288f89d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,7 +19,6 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 AbstractGPs = "0.3, 0.4, 0.5"
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index fce4db97..d6511802 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -27,7 +27,9 @@ using ForwardDiff
 using QuadGK
 using Optim
 
-export laplace_steps, laplace_posterior, optimize_elbo
+export laplace_lml, build_laplace_objective, build_laplace_objective!
+export laplace_posterior
+export laplace_steps
 include("laplace.jl")
 include("ep.jl")
 
diff --git a/src/laplace.jl b/src/laplace.jl
index 5ecbd10f..dffcaf5f 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -223,36 +223,6 @@ function build_laplace_objective(build_latent_gp, xs, ys; kwargs...)
     return build_laplace_objective!(f, build_latent_gp, xs, ys; kwargs...)
 end
 
-function optimize_elbo(
-    build_latent_gp,
-    theta0,
-    xs,
-    ys,
-    optimizer,
-    optim_options;
-    newton_warmstart=true,
-    newton_callback=nothing,
-)
-    f = similar(xs, length(xs))  # will be mutated in-place to "warm-start" the Newton steps
-    objective = build_laplace_objective!(
-        f, build_latent_gp, xs, ys; newton_warmstart, newton_callback
-    )
-
-    training_results = Optim.optimize(
-        objective,
-        θ -> only(Zygote.gradient(objective, θ)),
-        theta0,
-        optimizer,
-        optim_options;
-        inplace=false,
-    )
-
-    lf = build_latent_gp(training_results.minimizer)
-
-    f_post = laplace_posterior(lf(xs), ys; f_init=f)
-    return f_post, training_results
-end
-
 function laplace_f_cov(cache)
     # (K⁻¹ + W)⁻¹
     # = (√W⁻¹) (√W⁻¹ (K⁻¹ + W) √W⁻¹)⁻¹ (√W⁻¹)
diff --git a/test/Project.toml b/test/Project.toml
index 3890db74..368f12a3 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -17,7 +17,12 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 [compat]
 AbstractGPs = "0.4, 0.5"
 ApproximateGPs = "0.1"
+ChainRulesTestUtils = "1"
 Distributions = "0.25"
+FiniteDifferences = "0.12"
 Flux = "0.12"
 IterTools = "1"
+LogExpFunctions = "0.3"
+Optim = "1"
 PDMats = "0.11"
+Zygote = "0.6"
diff --git a/test/laplace.jl b/test/laplace.jl
index 66516ec8..59f88809 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -18,6 +18,35 @@ function build_latent_gp(theta)
     return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
 end
 
+function optimize_elbo(
+    build_latent_gp,
+    theta0,
+    xs,
+    ys,
+    optimizer,
+    optim_options;
+    newton_warmstart=true,
+    newton_callback=nothing,
+)
+    f = similar(xs, length(xs))  # will be mutated in-place to "warm-start" the Newton steps
+    objective = build_laplace_objective!(
+        f, build_latent_gp, xs, ys; newton_warmstart, newton_callback
+    )
+
+    training_results = Optim.optimize(
+        objective,
+        θ -> only(Zygote.gradient(objective, θ)),
+        theta0,
+        optimizer,
+        optim_options;
+        inplace=false,
+    )
+
+    lf = build_latent_gp(training_results.minimizer)
+    f_post = laplace_posterior(lf(xs), ys; f_init=f)
+    return f_post, training_results
+end
+
 @testset "Gaussian" begin
     # should check for convergence in one step, and agreement with exact GPR
 end
@@ -28,7 +57,7 @@ end
         theta0 = rand(2)
         function objective(theta)
             lf = build_latent_gp(theta)
-            lml = ApproximateGPs.laplace_lml(lf(X), Y)
+            lml = laplace_lml(lf(X), Y)
             return -lml
         end
         fd_grad = only(FiniteDifferences.grad(central_fdm(5, 1), objective, theta0))
@@ -58,10 +87,7 @@ end
 
     function objective(theta)
         lf = build_latent_gp(theta)
-        lfX = lf(X)
-        f_init, K = mean_and_cov(lfX.fx)
-        lml = ApproximateGPs.laplace_lml(lfX.lik, Y, K; f_init, maxiter=100)
-        return -lml
+        return -laplace_lml(lf(X), Y)
     end
 
     @testset "NelderMead" begin
@@ -91,7 +117,7 @@ end
             return n_newton_coldstart += 1
         end
 
-        _, res_cold = ApproximateGPs.optimize_elbo(
+        _, res_cold = optimize_elbo(
             build_latent_gp,
             theta0,
             X,
@@ -108,7 +134,7 @@ end
             return n_newton_warmstart += 1
         end
 
-        _, res_warm = ApproximateGPs.optimize_elbo(
+        _, res_warm = optimize_elbo(
             build_latent_gp,
             theta0,
             X,

From e4aab7e0c597be9980f63756693ac1ba768e2258 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 12:00:45 +0300
Subject: [PATCH 28/60] pkg bugfix

---
 Project.toml          | 1 -
 src/ApproximateGPs.jl | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index d288f89d..f72eb1ac 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,7 +13,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
 KLDivergences = "3c9cd921-3d3f-41e2-830c-e020174918cc"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index d6511802..67944665 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -22,10 +22,8 @@ include("svgp.jl")
 include("expected_loglik.jl")
 include("elbo.jl")
 
-using Zygote
 using ForwardDiff
-using QuadGK
-using Optim
+using QuadGK  # TODO replace with FastGaussQuadrature
 
 export laplace_lml, build_laplace_objective, build_laplace_objective!
 export laplace_posterior

From d19003fef93b537980accf6a795068c7bc2b6e0f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:15:52 +0300
Subject: [PATCH 29/60] update example manifests

---
 examples/a-regression/Manifest.toml     |  84 +++++++++---------
 examples/b-classification/Manifest.toml | 110 ++++++++++++------------
 2 files changed, 101 insertions(+), 93 deletions(-)

diff --git a/examples/a-regression/Manifest.toml b/examples/a-regression/Manifest.toml
index 913565cb..bf1717ca 100644
--- a/examples/a-regression/Manifest.toml
+++ b/examples/a-regression/Manifest.toml
@@ -24,19 +24,19 @@ uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 version = "3.3.1"
 
 [[ApproximateGPs]]
-deps = ["AbstractGPs", "ChainRulesCore", "Distributions", "FastGaussQuadrature", "FillArrays", "GPLikelihoods", "KLDivergences", "LinearAlgebra", "Reexport", "SpecialFunctions", "Statistics", "StatsBase"]
+deps = ["AbstractGPs", "ChainRulesCore", "Distributions", "FastGaussQuadrature", "FillArrays", "ForwardDiff", "GPLikelihoods", "KLDivergences", "LinearAlgebra", "QuadGK", "Reexport", "SpecialFunctions", "Statistics", "StatsBase"]
 path = "../.."
 uuid = "298c2ebc-0411-48ad-af38-99e88101b606"
-version = "0.1.0"
+version = "0.1.1"
 
 [[ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
 [[ArrayInterface]]
-deps = ["IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "d84c956c4c0548b4caf0e4e96cf5b6494b5b1529"
+deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
+git-tree-sha1 = "b8d49c34c3da35f220e7295659cd0bab8e739fed"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "3.1.32"
+version = "3.1.33"
 
 [[Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -81,9 +81,9 @@ version = "1.11.5"
 
 [[ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "4ce9393e871aca86cc457d9f66976c3da6902ea7"
+git-tree-sha1 = "bd4afa1fdeec0c8b89dad3c6e92bc6e3b0fec9ce"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.4.0"
+version = "1.6.0"
 
 [[CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
@@ -117,9 +117,9 @@ version = "0.3.0"
 
 [[Compat]]
 deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "4866e381721b30fac8dda4c8cb1d9db45c8d2994"
+git-tree-sha1 = "1a90210acd935f222ea19657f143004d2c2a1117"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.37.0"
+version = "3.38.0"
 
 [[CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -137,9 +137,9 @@ uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
 version = "0.5.7"
 
 [[DataAPI]]
-git-tree-sha1 = "bec2532f8adb82005476c141ec23e921fc20971b"
+git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.8.0"
+version = "1.9.0"
 
 [[DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
@@ -168,9 +168,9 @@ version = "1.0.3"
 
 [[DiffRules]]
 deps = ["NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "3ed8fa7178a10d1cd0f1ca524f249ba6937490c0"
+git-tree-sha1 = "7220bc21c33e990c14f4a9a319b1d242ebc5b269"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.3.0"
+version = "1.3.1"
 
 [[Distances]]
 deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
@@ -235,9 +235,9 @@ version = "0.4.7"
 
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "caf289224e622f518c9dbfe832cdafa17d7c80a6"
+git-tree-sha1 = "7f6ad1a7f4621b4ab8e554133dade99ebc6e7221"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.12.4"
+version = "0.12.5"
 
 [[FixedPointNumbers]]
 deps = ["Statistics"]
@@ -300,9 +300,9 @@ version = "0.2.0"
 
 [[GPUArrays]]
 deps = ["Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
-git-tree-sha1 = "8fac1cf7d6ce0f2249c7acaf25d22e1e85c4a07f"
+git-tree-sha1 = "7c39d767a9c55fafd01f7bc8b3fd0adf175fbc97"
 uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
-version = "8.0.2"
+version = "8.1.0"
 
 [[GPUCompiler]]
 deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
@@ -312,9 +312,9 @@ version = "0.12.9"
 
 [[GR]]
 deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
-git-tree-sha1 = "182da592436e287758ded5be6e32c406de3a2e47"
+git-tree-sha1 = "c2178cfbc0a5a552e16d097fae508f2024de61a3"
 uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
-version = "0.58.1"
+version = "0.59.0"
 
 [[GR_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
@@ -449,9 +449,9 @@ version = "3.100.1+0"
 
 [[LLVM]]
 deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
-git-tree-sha1 = "8fb1a675d1b51885a78bc980fbf1944279880f97"
+git-tree-sha1 = "36d95ecdfbc3240d728f68d73064d5b097fbf2ef"
 uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
-version = "4.5.1"
+version = "4.5.2"
 
 [[LLVMExtra_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -635,6 +635,10 @@ git-tree-sha1 = "7937eda4681660b4d6aeeecc2f7e1c81c8ee4e2f"
 uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051"
 version = "1.3.5+0"
 
+[[OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+
 [[OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a"
@@ -672,9 +676,9 @@ version = "0.11.1"
 
 [[Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "438d35d2d95ae2c5e8780b330592b6de8494e779"
+git-tree-sha1 = "9d8c00ef7a8d110787ff6f170579846f776133a9"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.0.3"
+version = "2.0.4"
 
 [[Pixman_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -700,9 +704,9 @@ version = "1.0.14"
 
 [[Plots]]
 deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
-git-tree-sha1 = "2dbafeadadcf7dadff20cd60046bba416b4912be"
+git-tree-sha1 = "457b13497a3ea4deb33d273a6a5ea15c25c0ebd9"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "1.21.3"
+version = "1.22.2"
 
 [[Preferences]]
 deps = ["TOML"]
@@ -726,9 +730,9 @@ version = "5.15.3+0"
 
 [[QuadGK]]
 deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b"
+git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
 uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.4.1"
+version = "2.4.2"
 
 [[REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
@@ -757,9 +761,9 @@ version = "1.1.2"
 
 [[RecipesPipeline]]
 deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"]
-git-tree-sha1 = "d4491becdc53580c6dadb0f6249f90caae888554"
+git-tree-sha1 = "7ad0dfa8d03b7bcf8c597f59f5292801730c55b8"
 uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
-version = "0.4.0"
+version = "0.4.1"
 
 [[Reexport]]
 git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
@@ -820,10 +824,10 @@ deps = ["LinearAlgebra", "Random"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[SpecialFunctions]]
-deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
-git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
+deps = ["ChainRulesCore", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
+git-tree-sha1 = "ad42c30a6204c74d264692e633133dcea0e8b14e"
 uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "1.6.1"
+version = "1.6.2"
 
 [[Static]]
 deps = ["IfElse"]
@@ -860,9 +864,9 @@ version = "0.9.10"
 
 [[StructArrays]]
 deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
-git-tree-sha1 = "f41020e84127781af49fc12b7e92becd7f5dd0ba"
+git-tree-sha1 = "2ce41e0d042c60ecd131e9fb7154a3bfadbf50d3"
 uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
-version = "0.6.2"
+version = "0.6.3"
 
 [[SuiteSparse]]
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
@@ -900,9 +904,9 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[TimerOutputs]]
 deps = ["ExprTools", "Printf"]
-git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
+git-tree-sha1 = "7cb456f358e8f9d102a8b25e8dfedf58fa5689bc"
 uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.12"
+version = "0.5.13"
 
 [[TranscodingStreams]]
 deps = ["Random", "Test"]
@@ -1074,9 +1078,9 @@ version = "1.4.0+3"
 
 [[ZipFile]]
 deps = ["Libdl", "Printf", "Zlib_jll"]
-git-tree-sha1 = "c3a5637e27e914a7a445b8d0ad063d701931e9f7"
+git-tree-sha1 = "3593e69e469d2111389a9bd06bac1f3d730ac6de"
 uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
-version = "0.9.3"
+version = "0.9.4"
 
 [[Zlib_jll]]
 deps = ["Libdl"]
@@ -1090,9 +1094,9 @@ version = "1.5.0+0"
 
 [[Zygote]]
 deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "ffbf36ba9cd8476347486a013c93590b910a4855"
+git-tree-sha1 = "4b799addc63aa77ad4112cede8086564d9068511"
 uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
-version = "0.6.21"
+version = "0.6.22"
 
 [[ZygoteRules]]
 deps = ["MacroTools"]
diff --git a/examples/b-classification/Manifest.toml b/examples/b-classification/Manifest.toml
index 6adb84dd..c596c51e 100644
--- a/examples/b-classification/Manifest.toml
+++ b/examples/b-classification/Manifest.toml
@@ -19,10 +19,10 @@ uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 version = "3.3.1"
 
 [[ApproximateGPs]]
-deps = ["AbstractGPs", "ChainRulesCore", "Distributions", "FastGaussQuadrature", "FillArrays", "GPLikelihoods", "KLDivergences", "LinearAlgebra", "Reexport", "SpecialFunctions", "Statistics", "StatsBase"]
+deps = ["AbstractGPs", "ChainRulesCore", "Distributions", "FastGaussQuadrature", "FillArrays", "ForwardDiff", "GPLikelihoods", "KLDivergences", "LinearAlgebra", "QuadGK", "Reexport", "SpecialFunctions", "Statistics", "StatsBase"]
 path = "../.."
 uuid = "298c2ebc-0411-48ad-af38-99e88101b606"
-version = "0.1.0"
+version = "0.1.1"
 
 [[ArgCheck]]
 git-tree-sha1 = "dedbbb2ddb876f899585c4ec4433265e3017215a"
@@ -33,10 +33,10 @@ version = "2.1.0"
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
 [[ArrayInterface]]
-deps = ["IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "d84c956c4c0548b4caf0e4e96cf5b6494b5b1529"
+deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
+git-tree-sha1 = "b8d49c34c3da35f220e7295659cd0bab8e739fed"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "3.1.32"
+version = "3.1.33"
 
 [[Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -82,9 +82,9 @@ version = "1.11.5"
 
 [[ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "4ce9393e871aca86cc457d9f66976c3da6902ea7"
+git-tree-sha1 = "bd4afa1fdeec0c8b89dad3c6e92bc6e3b0fec9ce"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.4.0"
+version = "1.6.0"
 
 [[CloseOpenIntervals]]
 deps = ["ArrayInterface", "Static"]
@@ -123,9 +123,9 @@ version = "0.3.0"
 
 [[Compat]]
 deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "4866e381721b30fac8dda4c8cb1d9db45c8d2994"
+git-tree-sha1 = "1a90210acd935f222ea19657f143004d2c2a1117"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.37.0"
+version = "3.38.0"
 
 [[CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -149,9 +149,9 @@ uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
 version = "0.5.7"
 
 [[DataAPI]]
-git-tree-sha1 = "bec2532f8adb82005476c141ec23e921fc20971b"
+git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.8.0"
+version = "1.9.0"
 
 [[DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
@@ -180,9 +180,9 @@ version = "1.0.3"
 
 [[DiffRules]]
 deps = ["NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "3ed8fa7178a10d1cd0f1ca524f249ba6937490c0"
+git-tree-sha1 = "7220bc21c33e990c14f4a9a319b1d242ebc5b269"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.3.0"
+version = "1.3.1"
 
 [[Distances]]
 deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
@@ -242,9 +242,9 @@ version = "0.4.7"
 
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "caf289224e622f518c9dbfe832cdafa17d7c80a6"
+git-tree-sha1 = "7f6ad1a7f4621b4ab8e554133dade99ebc6e7221"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.12.4"
+version = "0.12.5"
 
 [[FiniteDiff]]
 deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
@@ -311,9 +311,9 @@ version = "0.2.0"
 
 [[GR]]
 deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
-git-tree-sha1 = "182da592436e287758ded5be6e32c406de3a2e47"
+git-tree-sha1 = "c2178cfbc0a5a552e16d097fae508f2024de61a3"
 uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
-version = "0.58.1"
+version = "0.59.0"
 
 [[GR_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
@@ -581,9 +581,9 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
 [[LoopVectorization]]
 deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "Requires", "SLEEFPirates", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
-git-tree-sha1 = "d469fcf148475a74c221f14d42ee75da7ccb3b4e"
+git-tree-sha1 = "4804192466f4d370ca19c9957dfb3d919e6ef77e"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
-version = "0.12.73"
+version = "0.12.77"
 
 [[MacroTools]]
 deps = ["Markdown", "Random"]
@@ -648,15 +648,15 @@ uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 
 [[NonlinearSolve]]
 deps = ["ArrayInterface", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
-git-tree-sha1 = "35585534c0c79c161241f2e65e759a11a79d25d0"
+git-tree-sha1 = "e9ffc92217b8709e0cf7b8808f6223a4a0936c95"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
-version = "0.3.10"
+version = "0.3.11"
 
 [[OffsetArrays]]
 deps = ["Adapt"]
-git-tree-sha1 = "c870a0d713b51e4b49be6432eff0e26a4325afee"
+git-tree-sha1 = "c0e9e582987d36d5a61e650e6e543b9e44d9914b"
 uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-version = "1.10.6"
+version = "1.10.7"
 
 [[Ogg_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -664,6 +664,10 @@ git-tree-sha1 = "7937eda4681660b4d6aeeecc2f7e1c81c8ee4e2f"
 uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051"
 version = "1.3.5+0"
 
+[[OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+
 [[OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a"
@@ -713,15 +717,15 @@ version = "0.3.8"
 
 [[Parameters]]
 deps = ["OrderedCollections", "UnPack"]
-git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
+git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
 uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
-version = "0.12.2"
+version = "0.12.3"
 
 [[Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "438d35d2d95ae2c5e8780b330592b6de8494e779"
+git-tree-sha1 = "9d8c00ef7a8d110787ff6f170579846f776133a9"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.0.3"
+version = "2.0.4"
 
 [[Pixman_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -747,15 +751,15 @@ version = "1.0.14"
 
 [[Plots]]
 deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
-git-tree-sha1 = "2dbafeadadcf7dadff20cd60046bba416b4912be"
+git-tree-sha1 = "457b13497a3ea4deb33d273a6a5ea15c25c0ebd9"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "1.21.3"
+version = "1.22.2"
 
 [[Polyester]]
-deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
-git-tree-sha1 = "21d8a7163d0f3972ade36ca2b5a0e8a27ac96842"
+deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
+git-tree-sha1 = "74d358e649e0450cb5d3ff54ca7c8d806ed62765"
 uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
-version = "0.4.4"
+version = "0.5.1"
 
 [[PolyesterWeave]]
 deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
@@ -787,9 +791,9 @@ version = "5.15.3+0"
 
 [[QuadGK]]
 deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b"
+git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
 uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.4.1"
+version = "2.4.2"
 
 [[REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
@@ -806,9 +810,9 @@ version = "1.1.2"
 
 [[RecipesPipeline]]
 deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"]
-git-tree-sha1 = "d4491becdc53580c6dadb0f6249f90caae888554"
+git-tree-sha1 = "7ad0dfa8d03b7bcf8c597f59f5292801730c55b8"
 uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
-version = "0.4.0"
+version = "0.4.1"
 
 [[RecursiveArrayTools]]
 deps = ["ArrayInterface", "ChainRulesCore", "DocStringExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"]
@@ -855,15 +859,15 @@ version = "0.1.0"
 
 [[SLEEFPirates]]
 deps = ["IfElse", "Static", "VectorizationBase"]
-git-tree-sha1 = "947491c30d4293bebb00781bcaf787ba09e7c20d"
+git-tree-sha1 = "2e8150c7d2a14ac68537c7aac25faa6577aff046"
 uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
-version = "0.6.26"
+version = "0.6.27"
 
 [[SciMLBase]]
 deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "RecipesBase", "RecursiveArrayTools", "StaticArrays", "Statistics", "Tables", "TreeViews"]
-git-tree-sha1 = "ff686e0c79dbe91767f4c1e44257621a5455b1c6"
+git-tree-sha1 = "91e29a2bb257a4b992c48f35084064578b87d364"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.18.7"
+version = "1.19.0"
 
 [[Scratch]]
 deps = ["Dates"]
@@ -876,9 +880,9 @@ uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
 [[Setfield]]
 deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
-git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
+git-tree-sha1 = "def0718ddbabeb5476e51e5a43609bee889f285d"
 uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
-version = "0.7.1"
+version = "0.8.0"
 
 [[SharedArrays]]
 deps = ["Distributed", "Mmap", "Random", "Serialization"]
@@ -904,10 +908,10 @@ deps = ["LinearAlgebra", "Random"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[SpecialFunctions]]
-deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
-git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
+deps = ["ChainRulesCore", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
+git-tree-sha1 = "ad42c30a6204c74d264692e633133dcea0e8b14e"
 uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "1.6.1"
+version = "1.6.2"
 
 [[Static]]
 deps = ["IfElse"]
@@ -950,9 +954,9 @@ version = "0.2.4"
 
 [[StructArrays]]
 deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
-git-tree-sha1 = "f41020e84127781af49fc12b7e92becd7f5dd0ba"
+git-tree-sha1 = "2ce41e0d042c60ecd131e9fb7154a3bfadbf50d3"
 uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
-version = "0.6.2"
+version = "0.6.3"
 
 [[SuiteSparse]]
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
@@ -1002,9 +1006,9 @@ version = "0.3.0"
 
 [[TriangularSolve]]
 deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
-git-tree-sha1 = "1eed054a58d9332adc731103fe47dad2ad1a0adf"
+git-tree-sha1 = "ed55426a514db35f58d36c3812aae89cfc057401"
 uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
-version = "0.1.5"
+version = "0.1.6"
 
 [[URIs]]
 git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
@@ -1025,9 +1029,9 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [[VectorizationBase]]
 deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "Hwloc", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
-git-tree-sha1 = "43c605e008ac67adb672ef08721d4720dfe2ad41"
+git-tree-sha1 = "3e2385f4ec895e694c24a1d5aba58cb6d27cf8b6"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
-version = "0.21.7"
+version = "0.21.10"
 
 [[Wayland_jll]]
 deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
@@ -1191,9 +1195,9 @@ version = "1.5.0+0"
 
 [[Zygote]]
 deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "ffbf36ba9cd8476347486a013c93590b910a4855"
+git-tree-sha1 = "4b799addc63aa77ad4112cede8086564d9068511"
 uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
-version = "0.6.21"
+version = "0.6.22"
 
 [[ZygoteRules]]
 deps = ["MacroTools"]

From 2d43113f8509fe3dd6f9335cc8ec1e96237a2256 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:30:19 +0300
Subject: [PATCH 30/60] fix chainrule test by evaluating frule/rrule on
 newton_inner_loop based on L -> K=L'L

---
 test/laplace.jl | 60 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index 59f88809..edbbe00b 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -69,16 +69,62 @@ end
 @testset "chainrule" begin
     xs = [0.2, 0.3, 0.7]
     ys = [1, 1, 0]
-    K = kernelmatrix(with_lengthscale(Matern52Kernel(), 0.3), xs)
-    test_frule(
-        ApproximateGPs.newton_inner_loop,
+    L = randn(3, 3)
+
+    function newton_inner_loop_from_L(dist_y_given_f, ys, L; kwargs...)
+        K = L'L
+        return ApproximateGPs.newton_inner_loop(dist_y_given_f, ys, K; kwargs...)
+    end
+
+    function ChainRulesCore.frule(
+        (Δself, Δdist_y_given_f, Δys, ΔL),
+        ::typeof(newton_inner_loop_from_L),
         dist_y_given_f,
         ys,
-        K;
-        fkwargs=(; f_init=zero(ys), maxiter=100),
-        rtol=0.01,
+        L;
+        kwargs...,
+    )
+        K = L'L
+        # K̇ = L̇'L + L'L̇
+        ΔK = ΔL'L + L'ΔL
+        return frule(
+            (Δself, Δdist_y_given_f, Δys, ΔK),
+            ApproximateGPs.newton_inner_loop,
+            dist_y_given_f,
+            ys,
+            K;
+            kwargs...,
+        )
+    end
+
+    function ChainRulesCore.rrule(
+        ::typeof(newton_inner_loop_from_L), dist_y_given_f, ys, L; kwargs...
     )
-    #test_rrule(ApproximateGPs.newton_inner_loop, dist_y_given_f, ys, K; fkwargs=(;f_init=zero(ys), maxiter=100), rtol=0.05)  # my rrule might still be broken
+        K = L'L
+        f_opt, newton_from_K_pullback = rrule(
+            ApproximateGPs.newton_inner_loop, dist_y_given_f, ys, K; kwargs...
+        )
+
+        function newton_from_L_pullback(Δf_opt)
+            (∂self, ∂dist_y_given_f, ∂ys, ∂K) = newton_from_K_pullback(Δf_opt)
+            # Re⟨K̄, K̇⟩ = Re⟨K̄, L̇'L + L'L̇⟩
+            # = Re⟨K̄, L̇'L⟩ + Re⟨K̄, L'L̇⟩
+            # = Re⟨K̄L', L̇'⟩ + Re⟨LK̄, L̇⟩
+            # = Re⟨LK̄', L̇⟩ + Re⟨LK̄, L̇⟩
+            # = Re⟨LK̄' + LK̄, L̇⟩
+            # = Re⟨L̄, L̇⟩
+            # L̄ = L(K̄' + K̄)
+            ∂L = @thunk(L * (∂K' + ∂K))
+
+            return (∂self, ∂dist_y_given_f, ∂ys, ∂L)
+        end
+
+        return f_opt, newton_from_L_pullback
+    end
+
+    fkwargs = (; f_init=zero(ys), maxiter=100)
+    test_frule(newton_inner_loop_from_L, dist_y_given_f, ys, L; fkwargs)
+    test_rrule(newton_inner_loop_from_L, dist_y_given_f, ys, L; fkwargs)
 end
 
 @testset "optimization" begin

From bbd24ecd65bc3ca0767818721d535403288aec3c Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:32:28 +0300
Subject: [PATCH 31/60] add compat

---
 Project.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Project.toml b/Project.toml
index f72eb1ac..631e032e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -25,8 +25,10 @@ ChainRulesCore = "1"
 Distributions = "0.25"
 FastGaussQuadrature = "0.4"
 FillArrays = "0.12"
+ForwardDiff = "0.10"
 GPLikelihoods = "0.1, 0.2"
 KLDivergences = "0.2.1"
+QuadGK = "2"
 Reexport = "1"
 SpecialFunctions = "1"
 StatsBase = "0.33"

From ac6486d87541718f0dbfe720fb1b03eee73bc2b7 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:43:31 +0300
Subject: [PATCH 32/60] clean up test

---
 test/laplace.jl | 74 +++++++++++++++++++------------------------------
 1 file changed, 29 insertions(+), 45 deletions(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index edbbe00b..c7f3cd49 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -131,66 +131,50 @@ end
     X, Y = generate_data()
     theta0 = [0.0, 1.0]
 
-    function objective(theta)
-        lf = build_latent_gp(theta)
-        return -laplace_lml(lf(X), Y)
-    end
+    @testset "reference optimum" begin
+        function objective(theta)
+            lf = build_latent_gp(theta)
+            return -laplace_lml(lf(X), Y)
+        end
+
+        @testset "NelderMead" begin
+            expected_thetahat = [7.708967951453345, 1.5182348363613536]
+
+            res = Optim.optimize(objective, theta0, NelderMead())
+            #@info res
 
-    @testset "NelderMead" begin
-        expected_thetahat = [7.708967951453345, 1.5182348363613536]
+            @test res.minimizer ≈ expected_thetahat
+        end
+
+        @testset "gradient-based" begin
+            expected_thetahat = [7.709076337653239, 1.51820292019697]
 
-        res = Optim.optimize(objective, theta0, NelderMead(); inplace=false)
-        #@info res
+            objective_grad(θ) = only(Zygote.gradient(objective, θ))
+            res = Optim.optimize(objective, objective_grad, theta0, LBFGS(); inplace=false)
+            #@info res
 
-        @test res.minimizer ≈ expected_thetahat
+            @test res.minimizer ≈ expected_thetahat
+        end
     end
-    @testset "gradient-based" begin
-        expected_thetahat = [7.709076337653239, 1.51820292019697]
-
-        res = Optim.optimize(
-            objective,
-            θ -> only(Zygote.gradient(objective, θ)),
-            theta0,
-            LBFGS();
-            inplace=false,
-        )
-        #@info res
 
-        @test res.minimizer ≈ expected_thetahat
+    @testset "warmstart vs coldstart" begin
+        args = (build_latent_gp, theta0, X, Y, LBFGS(), Optim.Options(; iterations=1000))
 
         n_newton_coldstart = 0
-        function count_coldstart!(_, _)
-            return n_newton_coldstart += 1
-        end
+        count_coldstart!(_, _) = (n_newton_coldstart += 1)
 
         _, res_cold = optimize_elbo(
-            build_latent_gp,
-            theta0,
-            X,
-            Y,
-            LBFGS(),
-            Optim.Options(; iterations=1000);
-            newton_warmstart=false,
-            newton_callback=count_coldstart!,
+            args...; newton_warmstart=false, newton_callback=count_coldstart!
         )
-        @info "Coldstart:\n$res_cold"
+        #@info "Coldstart:\n$res_cold"
 
         n_newton_warmstart = 0
-        function count_warmstart!(_, _)
-            return n_newton_warmstart += 1
-        end
+        count_warmstart!(_, _) = (n_newton_warmstart += 1)
 
         _, res_warm = optimize_elbo(
-            build_latent_gp,
-            theta0,
-            X,
-            Y,
-            LBFGS(),
-            Optim.Options(; iterations=1000);
-            newton_warmstart=true,
-            newton_callback=count_warmstart!,
+            args...; newton_warmstart=true, newton_callback=count_warmstart!
         )
-        @info "Warmstart:\n$res_warm"
+        #@info "Warmstart:\n$res_warm"
 
         @info "Newton steps: $n_newton_coldstart (coldstart) vs $n_newton_warmstart (warmstart)"
         @test n_newton_coldstart - n_newton_warmstart > 100

From 96957e3b36234dae949756978f859139471b360f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:47:26 +0300
Subject: [PATCH 33/60] add missing CRC dependency

---
 test/Project.toml | 2 ++
 test/runtests.jl  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/test/Project.toml b/test/Project.toml
index 368f12a3..0dd7cfc5 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
@@ -17,6 +18,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 [compat]
 AbstractGPs = "0.4, 0.5"
 ApproximateGPs = "0.1"
+ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 Distributions = "0.25"
 FiniteDifferences = "0.12"
diff --git a/test/runtests.jl b/test/runtests.jl
index 8c881eb3..c9c4f91f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,6 +11,7 @@ using LinearAlgebra
 using PDMats
 using Optim
 using Zygote
+using ChainRulesCore
 using ChainRulesTestUtils
 using FiniteDifferences
 

From 5af23faec708083437bef4432ab2733a3880cae2 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 15:34:17 +0300
Subject: [PATCH 34/60] remove workaround

---
 src/laplace.jl    | 3 +--
 test/Project.toml | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index dffcaf5f..a3650050 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -137,8 +137,7 @@ function ChainRulesCore.rrule(::typeof(newton_inner_loop), dist_y_given_f, ys, K
         # ∂K = df/dK Δf
         ∂K = @thunk(cache.Wsqrt * (cache.B_ch \ (cache.Wsqrt \ Δf_opt)) * cache.d_loglik')
 
-        #return (∂self, ∂dist_y_given_f, ∂ys, ∂K)
-        return (∂self, NoTangent(), NoTangent(), ∂K)  # workaround until https://github.com/JuliaDiff/ChainRulesTestUtils.jl/pull/218 is merged
+        return (∂self, ∂dist_y_given_f, ∂ys, ∂K)
     end
 
     return f_opt, newton_pullback
diff --git a/test/Project.toml b/test/Project.toml
index 0dd7cfc5..f8c698e4 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -19,7 +19,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 AbstractGPs = "0.4, 0.5"
 ApproximateGPs = "0.1"
 ChainRulesCore = "1"
-ChainRulesTestUtils = "1"
+ChainRulesTestUtils = "1.2.3"
 Distributions = "0.25"
 FiniteDifferences = "0.12"
 Flux = "0.12"

From 54acbe3ce88db9d74a6651521fdd22758e710e9c Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 15:56:41 +0300
Subject: [PATCH 35/60] use more of AbstractGPs API

---
 src/ApproximateGPs.jl |  5 +++--
 src/laplace.jl        | 24 +++++++++++++++++-------
 test/laplace.jl       |  2 +-
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 67944665..3d4824fc 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -25,9 +25,10 @@ include("elbo.jl")
 using ForwardDiff
 using QuadGK  # TODO replace with FastGaussQuadrature
 
+export LaplaceApproximation
 export laplace_lml, build_laplace_objective, build_laplace_objective!
-export laplace_posterior
-export laplace_steps
+export approx_lml  # TODO move to AbstractGPs, see https://github.com/JuliaGaussianProcesses/AbstractGPs.jl/issues/221
+export laplace_steps  # TODO clean up/discard
 include("laplace.jl")
 include("ep.jl")
 
diff --git a/src/laplace.jl b/src/laplace.jl
index a3650050..32cceddb 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -261,17 +261,27 @@ function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior)
     return res_array
 end
 
-function laplace_posterior(lfx::AbstractGPs.LatentFiniteGP, ys; newton_kwargs...)
-    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)
+struct LaplaceApproximation{Tkw}
+    newton_kwargs::Tkw
+end
+
+LaplaceApproximation(; newton_kwargs...) = LaplaceApproximation(newton_kwargs)
+
+function approx_lml(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
+    return laplace_lml(lfx, ys; la.newton_kwargs...)
+end
+
+function AbstractGPs.posterior(
+    la::LaplaceApproximation, lfx::AbstractGPs.LatentFiniteGP, ys
+)
+    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; la.newton_kwargs...)
     _, cache = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
-    f_post = LaplacePosteriorGP(lfx.fx, cache)
+    f_post = ApproxPosteriorGP(la, lfx.fx, cache)
+    # TODO: instead of lfx.fx, should we store lfx itself (including lik)?
     return f_post
 end
 
-struct LaplacePosteriorGP{Tprior,Tcache} <: AbstractGPs.AbstractGP
-    prior::Tprior  # TODO: this is lfx.fx; should we store lfx itself (including lik) instead?
-    cache::Tcache
-end
+const LaplacePosteriorGP = ApproxPosteriorGP{<:LaplaceApproximation}
 
 function _laplace_predict_intermediates(cache, prior_at_x, xnew)
     k_x_xnew = cov(prior_at_x.f, prior_at_x.x, xnew)
diff --git a/test/laplace.jl b/test/laplace.jl
index c7f3cd49..18b1ee4f 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -43,7 +43,7 @@ function optimize_elbo(
     )
 
     lf = build_latent_gp(training_results.minimizer)
-    f_post = laplace_posterior(lf(xs), ys; f_init=f)
+    f_post = posterior(LaplaceApproximation(; f_init=f), lf(xs), ys)
     return f_post, training_results
 end
 

From df881171c8204adf842568112389eb64067d0b14 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 15:56:55 +0300
Subject: [PATCH 36/60] clean up laplace_steps

---
 src/laplace.jl | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 32cceddb..5921ea6d 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -20,7 +20,7 @@ function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     b = W * f + d_ll
     a = b - Wsqrt * (B_ch \ (Wsqrt * K * b))
 
-    return (; W, Wsqrt, K, a, loglik=ll, d_loglik=d_ll, B_ch)
+    return (; K, f, W, Wsqrt, loglik=ll, d_loglik=d_ll, B_ch, a)
 end
 
 # dist_y_given_f(f) = Bernoulli(logistic(f))
@@ -234,29 +234,34 @@ function laplace_f_cov(cache)
     return Wsqrt_inv * (I - inv(B_ch)) * Wsqrt_inv
 end
 
-function LaplaceResult(f, fnew, cache)
-    # TODO should we use fnew?
+function LaplaceResult(fnew, cache)
+    f = cache.f
     f_cov = laplace_f_cov(cache)
     q = MvNormal(f, AbstractGPs._symmetric(f_cov))
     lml_approx = _laplace_lml(f, cache)
 
-    return (; f, f_cov, q, lml_approx, cache)
+    return (; fnew, f_cov, q, lml_approx, cache...)
 end
 
-function laplace_steps(dist_y_given_f, f_prior, ys; maxiter=100, f=mean(f_prior))
-    @assert mean(f_prior) == zero(mean(f_prior))  # might work with non-zero prior mean but not checked
-    @assert length(ys) == length(f_prior) == length(f)
+"""
+    laplace_steps(lfx::LatentFiniteGP, ys; newton_kwargs...)
 
-    K = cov(f_prior)
+For demonstration purposes: returns an array of all the intermediate
+approximations of each Newton step.
+
+If you are only interested in the actual posterior, use
+[`posterior(::LaplaceApproximation, ::LatentFiniteGP, ::AbstractArray)`](@ref).
+"""
+function laplace_steps(lfx::LatentFiniteGP, ys; newton_kwargs...)
+    dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)
 
     res_array = []
 
     function store_result!(fnew, cache)
-        push!(res_array, LaplaceResult(copy(f), fnew, cache))
-        return f .= fnew
+        push!(res_array, LaplaceResult(fnew, cache))
     end
 
-    _ = _newton_inner_loop(dist_y_given_f, ys, K; f_init=f, maxiter, callback=store_result!)
+    _ = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs..., callback=store_result!)
 
     return res_array
 end
@@ -272,7 +277,7 @@ function approx_lml(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
 end
 
 function AbstractGPs.posterior(
-    la::LaplaceApproximation, lfx::AbstractGPs.LatentFiniteGP, ys
+    la::LaplaceApproximation, lfx::LatentFiniteGP, ys
 )
     dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; la.newton_kwargs...)
     _, cache = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)

From 8d20a65142da7db5c2472b8967b0c5a3b27388fb Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:28:46 +0300
Subject: [PATCH 37/60] add laplace example

---
 examples/c-comparisons/Manifest.toml | 1256 ++++++++++++++++++++++++++
 examples/c-comparisons/Project.toml  |   11 +
 examples/c-comparisons/script.jl     |   80 ++
 3 files changed, 1347 insertions(+)
 create mode 100644 examples/c-comparisons/Manifest.toml
 create mode 100644 examples/c-comparisons/Project.toml
 create mode 100644 examples/c-comparisons/script.jl

diff --git a/examples/c-comparisons/Manifest.toml b/examples/c-comparisons/Manifest.toml
new file mode 100644
index 00000000..c596c51e
--- /dev/null
+++ b/examples/c-comparisons/Manifest.toml
@@ -0,0 +1,1256 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[AbstractFFTs]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
+uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+version = "1.0.1"
+
+[[AbstractGPs]]
+deps = ["ChainRulesCore", "Distributions", "FillArrays", "IrrationalConstants", "KernelFunctions", "LinearAlgebra", "Random", "RecipesBase", "Reexport", "Statistics", "StatsBase", "Test"]
+git-tree-sha1 = "80b6c9734d00ae7518e65a0e1063772a050d04a0"
+uuid = "99985d1d-32ba-4be9-9821-2ec096f28918"
+version = "0.5.1"
+
+[[Adapt]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
+uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+version = "3.3.1"
+
+[[ApproximateGPs]]
+deps = ["AbstractGPs", "ChainRulesCore", "Distributions", "FastGaussQuadrature", "FillArrays", "ForwardDiff", "GPLikelihoods", "KLDivergences", "LinearAlgebra", "QuadGK", "Reexport", "SpecialFunctions", "Statistics", "StatsBase"]
+path = "../.."
+uuid = "298c2ebc-0411-48ad-af38-99e88101b606"
+version = "0.1.1"
+
+[[ArgCheck]]
+git-tree-sha1 = "dedbbb2ddb876f899585c4ec4433265e3017215a"
+uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
+version = "2.1.0"
+
+[[ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+
+[[ArrayInterface]]
+deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
+git-tree-sha1 = "b8d49c34c3da35f220e7295659cd0bab8e739fed"
+uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+version = "3.1.33"
+
+[[Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[Bijectors]]
+deps = ["ArgCheck", "ChainRulesCore", "Compat", "Distributions", "Functors", "IrrationalConstants", "LinearAlgebra", "LogExpFunctions", "MappedArrays", "NonlinearSolve", "Random", "Reexport", "Requires", "SparseArrays", "Statistics"]
+git-tree-sha1 = "dca5e02c9426b2f8ce86d8e723d0702ff33df234"
+uuid = "76274a88-744f-5084-9051-94815aaf08c4"
+version = "0.9.8"
+
+[[BitTwiddlingConvenienceFunctions]]
+deps = ["Static"]
+git-tree-sha1 = "652aab0fc0d6d4db4cc726425cadf700e9f473f1"
+uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
+version = "0.1.0"
+
+[[Bzip2_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
+uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
+version = "1.0.8+0"
+
+[[CPUSummary]]
+deps = ["Hwloc", "IfElse", "Static"]
+git-tree-sha1 = "ed720e2622820bf584d4ad90e6fcb93d95170b44"
+uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
+version = "0.1.3"
+
+[[Cairo_jll]]
+deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
+git-tree-sha1 = "f2202b55d816427cd385a9a4f3ffb226bee80f99"
+uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
+version = "1.16.1+0"
+
+[[ChainRules]]
+deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "d88340ab502af66cfffc821e70ae72f7dbdce645"
+uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+version = "1.11.5"
+
+[[ChainRulesCore]]
+deps = ["Compat", "LinearAlgebra", "SparseArrays"]
+git-tree-sha1 = "bd4afa1fdeec0c8b89dad3c6e92bc6e3b0fec9ce"
+uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+version = "1.6.0"
+
+[[CloseOpenIntervals]]
+deps = ["ArrayInterface", "Static"]
+git-tree-sha1 = "ce9c0d07ed6e1a4fecd2df6ace144cbd29ba6f37"
+uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
+version = "0.1.2"
+
+[[ColorSchemes]]
+deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random"]
+git-tree-sha1 = "9995eb3977fbf67b86d0a0a0508e83017ded03f2"
+uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
+version = "3.14.0"
+
+[[ColorTypes]]
+deps = ["FixedPointNumbers", "Random"]
+git-tree-sha1 = "024fe24d83e4a5bf5fc80501a314ce0d1aa35597"
+uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
+version = "0.11.0"
+
+[[Colors]]
+deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
+git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
+uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
+version = "0.12.8"
+
+[[CommonSolve]]
+git-tree-sha1 = "68a0743f578349ada8bc911a5cbd5a2ef6ed6d1f"
+uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
+version = "0.2.0"
+
+[[CommonSubexpressions]]
+deps = ["MacroTools", "Test"]
+git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
+uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
+version = "0.3.0"
+
+[[Compat]]
+deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
+git-tree-sha1 = "1a90210acd935f222ea19657f143004d2c2a1117"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "3.38.0"
+
+[[CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+
+[[CompositionsBase]]
+git-tree-sha1 = "455419f7e328a1a2493cabc6428d79e951349769"
+uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b"
+version = "0.1.1"
+
+[[ConstructionBase]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
+uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
+version = "1.3.0"
+
+[[Contour]]
+deps = ["StaticArrays"]
+git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
+uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
+version = "0.5.7"
+
+[[DataAPI]]
+git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8"
+uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
+version = "1.9.0"
+
+[[DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.10"
+
+[[DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[DelimitedFiles]]
+deps = ["Mmap"]
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+
+[[DiffResults]]
+deps = ["StaticArrays"]
+git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
+uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+version = "1.0.3"
+
+[[DiffRules]]
+deps = ["NaNMath", "Random", "SpecialFunctions"]
+git-tree-sha1 = "7220bc21c33e990c14f4a9a319b1d242ebc5b269"
+uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
+version = "1.3.1"
+
+[[Distances]]
+deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
+git-tree-sha1 = "9f46deb4d4ee4494ffb5a40a27a2aced67bdd838"
+uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+version = "0.10.4"
+
+[[Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[Distributions]]
+deps = ["ChainRulesCore", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
+git-tree-sha1 = "f4efaa4b5157e0cdb8283ae0b5428bc9208436ed"
+uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
+version = "0.25.16"
+
+[[DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
+uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+version = "0.8.5"
+
+[[Downloads]]
+deps = ["ArgTools", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+
+[[EarCut_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "3f3a2501fa7236e9b911e0f7a588c657e822bb6d"
+uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
+version = "2.2.3+0"
+
+[[Expat_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f"
+uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
+version = "2.2.10+0"
+
+[[FFMPEG]]
+deps = ["FFMPEG_jll"]
+git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8"
+uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a"
+version = "0.4.1"
+
+[[FFMPEG_jll]]
+deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"]
+git-tree-sha1 = "d8a578692e3077ac998b50c0217dfd67f21d1e5f"
+uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5"
+version = "4.4.0+0"
+
+[[FastGaussQuadrature]]
+deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"]
+git-tree-sha1 = "5829b25887e53fb6730a9df2ff89ed24baa6abf6"
+uuid = "442a2c76-b920-505d-bb47-c5924d526838"
+version = "0.4.7"
+
+[[FillArrays]]
+deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
+git-tree-sha1 = "7f6ad1a7f4621b4ab8e554133dade99ebc6e7221"
+uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
+version = "0.12.5"
+
+[[FiniteDiff]]
+deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "8b3c09b56acaf3c0e581c66638b85c8650ee9dca"
+uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
+version = "2.8.1"
+
+[[FixedPointNumbers]]
+deps = ["Statistics"]
+git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
+uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
+version = "0.8.4"
+
+[[Fontconfig_jll]]
+deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "21efd19106a55620a188615da6d3d06cd7f6ee03"
+uuid = "a3f928ae-7b40-5064-980b-68af3947d34b"
+version = "2.13.93+0"
+
+[[Formatting]]
+deps = ["Printf"]
+git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
+uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
+version = "0.4.2"
+
+[[ForwardDiff]]
+deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "NaNMath", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
+git-tree-sha1 = "b5e930ac60b613ef3406da6d4f42c35d8dc51419"
+uuid = "f6369f11-7733-5829-9624-2563aa707210"
+version = "0.10.19"
+
+[[FreeType2_jll]]
+deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "87eb71354d8ec1a96d4a7636bd57a7347dde3ef9"
+uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7"
+version = "2.10.4+0"
+
+[[FriBidi_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91"
+uuid = "559328eb-81f9-559d-9380-de523a88c83c"
+version = "1.0.10+0"
+
+[[Functors]]
+git-tree-sha1 = "e2727f02325451f6b24445cd83bfa9aaac19cbe7"
+uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
+version = "0.2.5"
+
+[[Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+
+[[GLFW_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"]
+git-tree-sha1 = "dba1e8614e98949abfa60480b13653813d8f0157"
+uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89"
+version = "3.3.5+0"
+
+[[GPLikelihoods]]
+deps = ["Distributions", "Functors", "LinearAlgebra", "Random", "StatsFuns"]
+git-tree-sha1 = "e07acc5ca79ead40c8cf0338c9357fc7fb90eea1"
+uuid = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
+version = "0.2.0"
+
+[[GR]]
+deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
+git-tree-sha1 = "c2178cfbc0a5a552e16d097fae508f2024de61a3"
+uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
+version = "0.59.0"
+
+[[GR_jll]]
+deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
+git-tree-sha1 = "ef49a187604f865f4708c90e3f431890724e9012"
+uuid = "d2c73de3-f751-5644-a686-071e5b155ba9"
+version = "0.59.0+0"
+
+[[GeometryBasics]]
+deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
+git-tree-sha1 = "58bcdf5ebc057b085e58d95c138725628dd7453c"
+uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
+version = "0.4.1"
+
+[[Gettext_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"]
+git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046"
+uuid = "78b55507-aeef-58d4-861c-77aaff3498b1"
+version = "0.21.0+0"
+
+[[Glib_jll]]
+deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "7bf67e9a481712b3dbe9cb3dac852dc4b1162e02"
+uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
+version = "2.68.3+0"
+
+[[Graphite2_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011"
+uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472"
+version = "1.3.14+0"
+
+[[Grisu]]
+git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2"
+uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe"
+version = "1.0.2"
+
+[[HTTP]]
+deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"]
+git-tree-sha1 = "60ed5f1643927479f845b0135bb369b031b541fa"
+uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+version = "0.9.14"
+
+[[HarfBuzz_jll]]
+deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
+git-tree-sha1 = "8a954fed8ac097d5be04921d595f741115c1b2ad"
+uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566"
+version = "2.8.1+0"
+
+[[HostCPUFeatures]]
+deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
+git-tree-sha1 = "3169c8b31863f9a409be1d17693751314241e3eb"
+uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
+version = "0.1.4"
+
+[[Hwloc]]
+deps = ["Hwloc_jll"]
+git-tree-sha1 = "92d99146066c5c6888d5a3abc871e6a214388b91"
+uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
+version = "2.0.0"
+
+[[Hwloc_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "3395d4d4aeb3c9d31f5929d32760d8baeee88aaf"
+uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
+version = "2.5.0+0"
+
+[[IOCapture]]
+deps = ["Logging", "Random"]
+git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
+uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
+version = "0.2.2"
+
+[[IRTools]]
+deps = ["InteractiveUtils", "MacroTools", "Test"]
+git-tree-sha1 = "95215cd0076a150ef46ff7928892bc341864c73c"
+uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
+version = "0.4.3"
+
+[[IfElse]]
+git-tree-sha1 = "28e837ff3e7a6c3cdb252ce49fb412c8eb3caeef"
+uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
+version = "0.1.0"
+
+[[IniFile]]
+deps = ["Test"]
+git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
+uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
+version = "0.5.0"
+
+[[InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[IrrationalConstants]]
+git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
+uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
+version = "0.1.0"
+
+[[IterTools]]
+git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18"
+uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
+version = "1.3.0"
+
+[[IterativeSolvers]]
+deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
+git-tree-sha1 = "1a8c6237e78b714e901e406c096fc8a65528af7d"
+uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
+version = "0.9.1"
+
+[[IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[JLLWrappers]]
+deps = ["Preferences"]
+git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.3.0"
+
+[[JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.2"
+
+[[JpegTurbo_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943"
+uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
+version = "2.1.0+0"
+
+[[KLDivergences]]
+deps = ["ChainRulesCore", "Distances", "Distributions", "LinearAlgebra", "PDMats", "SpecialFunctions", "StatsBase"]
+git-tree-sha1 = "b4663db8fb56053b1d1a2af80533b645eba9583a"
+uuid = "3c9cd921-3d3f-41e2-830c-e020174918cc"
+version = "0.2.1"
+
+[[KernelFunctions]]
+deps = ["ChainRulesCore", "Compat", "CompositionsBase", "Distances", "FillArrays", "Functors", "IrrationalConstants", "LinearAlgebra", "LogExpFunctions", "Random", "Requires", "SpecialFunctions", "StatsBase", "TensorCore", "Test", "ZygoteRules"]
+git-tree-sha1 = "3b7fceeab37b650c280eb072ffe2b868b03a5423"
+uuid = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
+version = "0.10.17"
+
+[[LAME_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c"
+uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d"
+version = "3.100.1+0"
+
+[[LZO_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6"
+uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac"
+version = "2.10.1+0"
+
+[[LaTeXStrings]]
+git-tree-sha1 = "c7f1c695e06c01b95a67f0cd1d34994f3e7db104"
+uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+version = "1.2.1"
+
+[[Latexify]]
+deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"]
+git-tree-sha1 = "a4b12a1bd2ebade87891ab7e36fdbce582301a92"
+uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
+version = "0.15.6"
+
+[[LayoutPointers]]
+deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
+git-tree-sha1 = "d2bda6aa0b03ce6f141a2dc73d0bcb7070131adc"
+uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
+version = "0.1.3"
+
+[[LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+
+[[LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+
+[[LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[Libffi_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "761a393aeccd6aa92ec3515e428c26bf99575b3b"
+uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490"
+version = "3.2.2+0"
+
+[[Libgcrypt_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
+git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
+uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
+version = "1.8.7+0"
+
+[[Libglvnd_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
+git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
+uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
+version = "1.3.0+3"
+
+[[Libgpg_error_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
+uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
+version = "1.42.0+0"
+
+[[Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.16.1+1"
+
+[[Libmount_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73"
+uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9"
+version = "2.35.0+0"
+
+[[Libtiff_jll]]
+deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"]
+git-tree-sha1 = "340e257aada13f95f98ee352d316c3bed37c8ab9"
+uuid = "89763e89-9b03-5906-acba-b20f662cd828"
+version = "4.3.0+0"
+
+[[Libuuid_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066"
+uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
+version = "2.36.0+0"
+
+[[LineSearches]]
+deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
+git-tree-sha1 = "f27132e551e959b3667d8c93eae90973225032dd"
+uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
+version = "7.1.1"
+
+[[LinearAlgebra]]
+deps = ["Libdl"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[Literate]]
+deps = ["Base64", "IOCapture", "JSON", "REPL"]
+git-tree-sha1 = "bbebc3c14dbfbe76bfcbabf0937481ac84dc86ef"
+uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+version = "2.9.3"
+
+[[LogExpFunctions]]
+deps = ["ChainRulesCore", "DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
+git-tree-sha1 = "34dc30f868e368f8a17b728a1238f3fcda43931a"
+uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+version = "0.3.3"
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[LoopVectorization]]
+deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "Requires", "SLEEFPirates", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "4804192466f4d370ca19c9957dfb3d919e6ef77e"
+uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
+version = "0.12.77"
+
+[[MacroTools]]
+deps = ["Markdown", "Random"]
+git-tree-sha1 = "5a5bc6bf062f0f95e62d0fe0a2d99699fed82dd9"
+uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+version = "0.5.8"
+
+[[ManualMemory]]
+git-tree-sha1 = "9cb207b18148b2199db259adfa923b45593fe08e"
+uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667"
+version = "0.1.6"
+
+[[MappedArrays]]
+git-tree-sha1 = "e8b359ef06ec72e8c030463fe02efe5527ee5142"
+uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900"
+version = "0.4.1"
+
+[[Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[MbedTLS]]
+deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
+git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe"
+uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
+version = "1.0.3"
+
+[[MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+
+[[Measures]]
+git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
+uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
+version = "0.3.1"
+
+[[Missings]]
+deps = ["DataAPI"]
+git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "1.0.2"
+
+[[Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+
+[[NLSolversBase]]
+deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
+git-tree-sha1 = "144bab5b1443545bc4e791536c9f1eacb4eed06a"
+uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
+version = "7.8.1"
+
+[[NaNMath]]
+git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
+uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
+version = "0.3.5"
+
+[[NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+
+[[NonlinearSolve]]
+deps = ["ArrayInterface", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
+git-tree-sha1 = "e9ffc92217b8709e0cf7b8808f6223a4a0936c95"
+uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
+version = "0.3.11"
+
+[[OffsetArrays]]
+deps = ["Adapt"]
+git-tree-sha1 = "c0e9e582987d36d5a61e650e6e543b9e44d9914b"
+uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
+version = "1.10.7"
+
+[[Ogg_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "7937eda4681660b4d6aeeecc2f7e1c81c8ee4e2f"
+uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051"
+version = "1.3.5+0"
+
+[[OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+
+[[OpenSSL_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "1.1.10+0"
+
+[[OpenSpecFun_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
+uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
+version = "0.5.5+0"
+
+[[Optim]]
+deps = ["Compat", "FillArrays", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"]
+git-tree-sha1 = "7863df65dbb2a0fa8f85fcaf0a41167640d2ebed"
+uuid = "429524aa-4258-5aef-a3af-852621145aeb"
+version = "1.4.1"
+
+[[Opus_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "51a08fb14ec28da2ec7a927c4337e4332c2a4720"
+uuid = "91d4177d-7536-5919-b921-800302f37372"
+version = "1.3.2+0"
+
+[[OrderedCollections]]
+git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.4.1"
+
+[[PCRE_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488"
+uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc"
+version = "8.44.0+0"
+
+[[PDMats]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "4dd403333bcf0909341cfe57ec115152f937d7d8"
+uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
+version = "0.11.1"
+
+[[ParameterHandling]]
+deps = ["Bijectors", "ChainRulesCore", "Compat", "IterTools", "LinearAlgebra", "SparseArrays", "Test"]
+git-tree-sha1 = "b454231b4559c118fe3522734c71555592445843"
+uuid = "2412ca09-6db7-441c-8e3a-88d5709968c5"
+version = "0.3.8"
+
+[[Parameters]]
+deps = ["OrderedCollections", "UnPack"]
+git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
+uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
+version = "0.12.3"
+
+[[Parsers]]
+deps = ["Dates"]
+git-tree-sha1 = "9d8c00ef7a8d110787ff6f170579846f776133a9"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.0.4"
+
+[[Pixman_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "b4f5d02549a10e20780a24fce72bea96b6329e29"
+uuid = "30392449-352a-5448-841d-b1acce4e97dc"
+version = "0.40.1+0"
+
+[[Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[PlotThemes]]
+deps = ["PlotUtils", "Requires", "Statistics"]
+git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d"
+uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
+version = "2.0.1"
+
+[[PlotUtils]]
+deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"]
+git-tree-sha1 = "2537ed3c0ed5e03896927187f5f2ee6a4ab342db"
+uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043"
+version = "1.0.14"
+
+[[Plots]]
+deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
+git-tree-sha1 = "457b13497a3ea4deb33d273a6a5ea15c25c0ebd9"
+uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+version = "1.22.2"
+
+[[Polyester]]
+deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
+git-tree-sha1 = "74d358e649e0450cb5d3ff54ca7c8d806ed62765"
+uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
+version = "0.5.1"
+
+[[PolyesterWeave]]
+deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
+git-tree-sha1 = "371a19bb801c1b420b29141750f3a34d6c6634b9"
+uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
+version = "0.1.0"
+
+[[PositiveFactorizations]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20"
+uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
+version = "0.2.4"
+
+[[Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.2.2"
+
+[[Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[Qt5Base_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"]
+git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8"
+uuid = "ea2cea3b-5b76-57ae-a6ef-0a8af62496e1"
+version = "5.15.3+0"
+
+[[QuadGK]]
+deps = ["DataStructures", "LinearAlgebra"]
+git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
+uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
+version = "2.4.2"
+
+[[REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[RecipesBase]]
+git-tree-sha1 = "44a75aa7a527910ee3d1751d1f0e4148698add9e"
+uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+version = "1.1.2"
+
+[[RecipesPipeline]]
+deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"]
+git-tree-sha1 = "7ad0dfa8d03b7bcf8c597f59f5292801730c55b8"
+uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
+version = "0.4.1"
+
+[[RecursiveArrayTools]]
+deps = ["ArrayInterface", "ChainRulesCore", "DocStringExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"]
+git-tree-sha1 = "00bede2eb099dcc1ddc3f9ec02180c326b420ee2"
+uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
+version = "2.17.2"
+
+[[RecursiveFactorization]]
+deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "StrideArraysCore", "TriangularSolve"]
+git-tree-sha1 = "575c18c6b00ce409f75d96fefe33ebe01575457a"
+uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
+version = "0.2.4"
+
+[[Reexport]]
+git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "1.2.2"
+
+[[Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.1.3"
+
+[[Rmath]]
+deps = ["Random", "Rmath_jll"]
+git-tree-sha1 = "bf3188feca147ce108c76ad82c2792c57abe7b1f"
+uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
+version = "0.7.0"
+
+[[Rmath_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7"
+uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
+version = "0.3.0+0"
+
+[[SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[SIMDTypes]]
+git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
+uuid = "94e857df-77ce-4151-89e5-788b33177be4"
+version = "0.1.0"
+
+[[SLEEFPirates]]
+deps = ["IfElse", "Static", "VectorizationBase"]
+git-tree-sha1 = "2e8150c7d2a14ac68537c7aac25faa6577aff046"
+uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
+version = "0.6.27"
+
+[[SciMLBase]]
+deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "RecipesBase", "RecursiveArrayTools", "StaticArrays", "Statistics", "Tables", "TreeViews"]
+git-tree-sha1 = "91e29a2bb257a4b992c48f35084064578b87d364"
+uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
+version = "1.19.0"
+
+[[Scratch]]
+deps = ["Dates"]
+git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
+uuid = "6c6a2e73-6563-6170-7368-637461726353"
+version = "1.1.0"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[Setfield]]
+deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
+git-tree-sha1 = "def0718ddbabeb5476e51e5a43609bee889f285d"
+uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
+version = "0.8.0"
+
+[[SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+
+[[Showoff]]
+deps = ["Dates", "Grisu"]
+git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de"
+uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f"
+version = "1.0.3"
+
+[[Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[SortingAlgorithms]]
+deps = ["DataStructures"]
+git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "1.0.1"
+
+[[SparseArrays]]
+deps = ["LinearAlgebra", "Random"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[SpecialFunctions]]
+deps = ["ChainRulesCore", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
+git-tree-sha1 = "ad42c30a6204c74d264692e633133dcea0e8b14e"
+uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
+version = "1.6.2"
+
+[[Static]]
+deps = ["IfElse"]
+git-tree-sha1 = "a8f30abc7c64a39d389680b74e749cf33f872a70"
+uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
+version = "0.3.3"
+
+[[StaticArrays]]
+deps = ["LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "3240808c6d463ac46f1c1cd7638375cd22abbccb"
+uuid = "90137ffa-7385-5640-81b9-e52037218182"
+version = "1.2.12"
+
+[[Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[StatsAPI]]
+git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510"
+uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
+version = "1.0.0"
+
+[[StatsBase]]
+deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
+git-tree-sha1 = "8cbbc098554648c84f79a463c9ff0fd277144b6c"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.33.10"
+
+[[StatsFuns]]
+deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
+git-tree-sha1 = "46d7ccc7104860c38b11966dd1f72ff042f382e4"
+uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+version = "0.9.10"
+
+[[StrideArraysCore]]
+deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"]
+git-tree-sha1 = "1258e25e171aec339866f283a11e7d75867e77d7"
+uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
+version = "0.2.4"
+
+[[StructArrays]]
+deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
+git-tree-sha1 = "2ce41e0d042c60ecd131e9fb7154a3bfadbf50d3"
+uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+version = "0.6.3"
+
+[[SuiteSparse]]
+deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
+uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
+
+[[TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+
+[[TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.1"
+
+[[Tables]]
+deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
+git-tree-sha1 = "1162ce4a6c4b7e31e0e6b14486a6986951c73be9"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "1.5.2"
+
+[[Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+
+[[TensorCore]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "1feb45f88d133a655e001435632f019a9a1bcdb6"
+uuid = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50"
+version = "0.1.1"
+
+[[Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[ThreadingUtilities]]
+deps = ["ManualMemory"]
+git-tree-sha1 = "03013c6ae7f1824131b2ae2fc1d49793b51e8394"
+uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5"
+version = "0.4.6"
+
+[[TreeViews]]
+deps = ["Test"]
+git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6"
+uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7"
+version = "0.3.0"
+
+[[TriangularSolve]]
+deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
+git-tree-sha1 = "ed55426a514db35f58d36c3812aae89cfc057401"
+uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
+version = "0.1.6"
+
+[[URIs]]
+git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
+uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
+version = "1.3.0"
+
+[[UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[UnPack]]
+git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
+uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+version = "1.0.2"
+
+[[Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[VectorizationBase]]
+deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "Hwloc", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
+git-tree-sha1 = "3e2385f4ec895e694c24a1d5aba58cb6d27cf8b6"
+uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
+version = "0.21.10"
+
+[[Wayland_jll]]
+deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
+git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
+uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89"
+version = "1.19.0+0"
+
+[[Wayland_protocols_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"]
+git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670"
+uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91"
+version = "1.18.0+4"
+
+[[XML2_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
+uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
+version = "2.9.12+0"
+
+[[XSLT_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
+git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
+uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
+version = "1.1.34+0"
+
+[[Xorg_libX11_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
+git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
+uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
+version = "1.6.9+4"
+
+[[Xorg_libXau_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
+uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
+version = "1.0.9+4"
+
+[[Xorg_libXcursor_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"]
+git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd"
+uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724"
+version = "1.2.0+4"
+
+[[Xorg_libXdmcp_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
+uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
+version = "1.1.3+4"
+
+[[Xorg_libXext_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
+git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
+uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
+version = "1.3.4+4"
+
+[[Xorg_libXfixes_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
+git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4"
+uuid = "d091e8ba-531a-589c-9de9-94069b037ed8"
+version = "5.0.3+4"
+
+[[Xorg_libXi_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"]
+git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246"
+uuid = "a51aa0fd-4e3c-5386-b890-e753decda492"
+version = "1.7.10+4"
+
+[[Xorg_libXinerama_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"]
+git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123"
+uuid = "d1454406-59df-5ea1-beac-c340f2130bc3"
+version = "1.1.4+4"
+
+[[Xorg_libXrandr_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"]
+git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631"
+uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484"
+version = "1.5.2+4"
+
+[[Xorg_libXrender_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
+git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96"
+uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa"
+version = "0.9.10+4"
+
+[[Xorg_libpthread_stubs_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
+uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
+version = "0.1.0+3"
+
+[[Xorg_libxcb_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
+git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
+uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
+version = "1.13.0+3"
+
+[[Xorg_libxkbfile_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
+git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2"
+uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a"
+version = "1.1.0+4"
+
+[[Xorg_xcb_util_image_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
+git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97"
+uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b"
+version = "0.4.0+1"
+
+[[Xorg_xcb_util_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"]
+git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1"
+uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5"
+version = "0.4.0+1"
+
+[[Xorg_xcb_util_keysyms_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
+git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00"
+uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7"
+version = "0.4.0+1"
+
+[[Xorg_xcb_util_renderutil_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
+git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e"
+uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e"
+version = "0.3.9+1"
+
+[[Xorg_xcb_util_wm_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
+git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67"
+uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361"
+version = "0.4.1+1"
+
+[[Xorg_xkbcomp_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"]
+git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b"
+uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4"
+version = "1.4.2+4"
+
+[[Xorg_xkeyboard_config_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"]
+git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d"
+uuid = "33bec58e-1273-512f-9401-5d533626f822"
+version = "2.27.0+4"
+
+[[Xorg_xtrans_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
+uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
+version = "1.4.0+3"
+
+[[Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+
+[[Zstd_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6"
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.0+0"
+
+[[Zygote]]
+deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
+git-tree-sha1 = "4b799addc63aa77ad4112cede8086564d9068511"
+uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
+version = "0.6.22"
+
+[[ZygoteRules]]
+deps = ["MacroTools"]
+git-tree-sha1 = "9e7a1e8ca60b742e508a315c17eef5211e7fbfd7"
+uuid = "700de1a5-db45-46bc-99cf-38207098b444"
+version = "0.2.1"
+
+[[libass_jll]]
+deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "5982a94fcba20f02f42ace44b9894ee2b140fe47"
+uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0"
+version = "0.15.1+0"
+
+[[libfdk_aac_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "daacc84a041563f965be61859a36e17c4e4fcd55"
+uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280"
+version = "2.0.2+0"
+
+[[libpng_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c"
+uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f"
+version = "1.6.38+0"
+
+[[libvorbis_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"]
+git-tree-sha1 = "c45f4e40e7aafe9d086379e5578947ec8b95a8fb"
+uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a"
+version = "1.3.7+0"
+
+[[nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+
+[[p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+
+[[x264_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2"
+uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a"
+version = "2021.5.5+0"
+
+[[x265_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9"
+uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76"
+version = "3.5.0+0"
+
+[[xkbcommon_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"]
+git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6"
+uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd"
+version = "0.9.1+5"
diff --git a/examples/c-comparisons/Project.toml b/examples/c-comparisons/Project.toml
new file mode 100644
index 00000000..e2e658da
--- /dev/null
+++ b/examples/c-comparisons/Project.toml
@@ -0,0 +1,11 @@
+[deps]
+ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
new file mode 100644
index 00000000..91ebc027
--- /dev/null
+++ b/examples/c-comparisons/script.jl
@@ -0,0 +1,80 @@
+# # Binary Classification with Various Approximations
+# 
+#
+# This example demonstrates how to carry out non-conjugate Gaussian process
+# inference using different approximations.
+# For a basic introduction to the functionality of this library, please refer to the
+# [User Guide](@ref).
+#
+# ## Setup
+
+using ApproximateGPs
+using ParameterHandling
+using Zygote
+using Distributions
+using LogExpFunctions
+using LinearAlgebra
+using Optim
+
+using Plots
+default(; legend=:outertopright, size=(700, 400))
+
+using Random
+Random.seed!(1);
+
+# ## Generate some training data
+
+Xgrid = -5:0.1:29
+X = range(0, 23.5; length=48)
+fs = @. 3 * sin(10 + 0.6X) + sin(0.1X) - 1
+# invlink = normcdf
+invlink = logistic
+ps = invlink.(fs)
+Y = [rand(Bernoulli(p)) for p in ps]
+
+function plot_data()
+    plot()
+    plot!(X, ps)
+    return scatter!(X, Y)
+end
+
+plot_data()
+
+# ## Creating the latent GP
+
+dist_y_given_f(f) = Bernoulli(invlink(f))
+
+function build_latent_gp(theta)
+    variance = softplus(theta[1])
+    lengthscale = softplus(theta[2])
+    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
+    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
+end
+
+function plot_samples!(Xgrid, fpost; samples=100, color=2)
+    fsamples = rand(fpost(Xgrid, 1e-8), samples)
+    return plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
+end
+
+# Initialise the parameters
+
+theta0 = [0.0, 1.0]
+
+lf = build_latent_gp(theta0)
+
+f_post = posterior(LaplaceApproximation(), lf(X), Y)
+
+plot_samples!(Xgrid, f_post)
+
+# Optimise the parameters
+
+objective = build_laplace_objective(build_latent_gp, X, Y; newton_warmstart=true)
+
+training_results = Optim.optimize(
+    objective, θ -> only(Zygote.gradient(objective, θ)), theta0, LBFGS(); inplace=false
+)
+
+lf2 = build_latent_gp(training_results.minimizer)
+f_post2 = posterior(LaplaceApproximation(), lf(X), Y)
+
+plot_samples!(Xgrid, f_post2)

From 30ba66328e3c28fc65f9112f5c575d878b34dbcc Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:29:09 +0300
Subject: [PATCH 38/60] cleanup

---
 test/laplace.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index 18b1ee4f..deaa66de 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -32,10 +32,11 @@ function optimize_elbo(
     objective = build_laplace_objective!(
         f, build_latent_gp, xs, ys; newton_warmstart, newton_callback
     )
+    objective_grad(θ) = only(Zygote.gradient(objective, θ))
 
     training_results = Optim.optimize(
         objective,
-        θ -> only(Zygote.gradient(objective, θ)),
+        objective_grad,
         theta0,
         optimizer,
         optim_options;

From 82094427bfaeeb859b57bb756d857fdd255403ba Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:29:25 +0300
Subject: [PATCH 39/60] cleanup2

---
 test/laplace.jl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index deaa66de..69a06494 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -35,12 +35,7 @@ function optimize_elbo(
     objective_grad(θ) = only(Zygote.gradient(objective, θ))
 
     training_results = Optim.optimize(
-        objective,
-        objective_grad,
-        theta0,
-        optimizer,
-        optim_options;
-        inplace=false,
+        objective, objective_grad, theta0, optimizer, optim_options; inplace=false
     )
 
     lf = build_latent_gp(training_results.minimizer)

From def61e77ae04486dc40fa3e7532d71bfec19f84e Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:30:15 +0300
Subject: [PATCH 40/60] format

---
 src/laplace.jl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 5921ea6d..3cea314c 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -258,7 +258,7 @@ function laplace_steps(lfx::LatentFiniteGP, ys; newton_kwargs...)
     res_array = []
 
     function store_result!(fnew, cache)
-        push!(res_array, LaplaceResult(fnew, cache))
+        return push!(res_array, LaplaceResult(fnew, cache))
     end
 
     _ = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs..., callback=store_result!)
@@ -276,9 +276,7 @@ function approx_lml(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
     return laplace_lml(lfx, ys; la.newton_kwargs...)
 end
 
-function AbstractGPs.posterior(
-    la::LaplaceApproximation, lfx::LatentFiniteGP, ys
-)
+function AbstractGPs.posterior(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
     dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; la.newton_kwargs...)
     _, cache = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
     f_post = ApproxPosteriorGP(la, lfx.fx, cache)

From 4566c178c3d2a8215dbe7c3e28a37ab40d6f0c5c Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:30:41 +0300
Subject: [PATCH 41/60] remove demo script

---
 demo_laplace.jl | 84 -------------------------------------------------
 1 file changed, 84 deletions(-)
 delete mode 100644 demo_laplace.jl

diff --git a/demo_laplace.jl b/demo_laplace.jl
deleted file mode 100644
index 82ad1d58..00000000
--- a/demo_laplace.jl
+++ /dev/null
@@ -1,84 +0,0 @@
-using Random
-using LogExpFunctions
-using Distributions
-using Plots
-using Optim
-using AbstractGPs
-using AbstractGPs: LatentFiniteGP
-using ApproximateGPs
-
-Random.seed!(1)
-X = range(0, 23.5; length=48)
-fs = @. 3 * sin(10 + 0.6X) + sin(0.1X) - 1
-# invlink = normcdf
-invlink = logistic
-ps = invlink.(fs)
-Y = [rand(Bernoulli(p)) for p in ps]
-
-function plot_data()
-    plot()
-    plot!(X, ps)
-    return scatter!(X, Y)
-end
-
-dist_y_given_f(f) = Bernoulli(invlink(f))
-
-function build_latent_gp(theta)
-    variance = softplus(theta[1])
-    lengthscale = softplus(theta[2])
-    kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
-    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
-end
-
-function plot_samples!(Xgrid, fpost; samples=100, color=2)
-    fsamples = rand(fpost(Xgrid, 1e-8), samples)
-    return plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
-end
-
-using Zygote
-
-theta0 = [0.0, 1.0]
-
-optimizer = LBFGS(;
-    alphaguess=Optim.LineSearches.InitialStatic(; scaled=true),
-    linesearch=Optim.LineSearches.BackTracking(),
-)
-optim_options = Optim.Options(; iterations=100)
-
-lf = build_latent_gp(theta0)
-lfX = lf(X)
-
-newt_res = laplace_steps(lfX.lik, lfX.fx, Y)
-
-f_post, opt_res = ApproximateGPs.optimize_elbo(
-    build_latent_gp, theta0, X, Y, NelderMead(), optim_options
-)
-
-theta1 = opt_res.minimizer
-
-function full_objective(theta)
-    Zygote.ignore() do
-        # Zygote does not like the try/catch within @info
-        @info "Hyperparameters: $theta"
-    end
-    lf = build_latent_gp(theta)
-    f = zeros(length(X))
-    lml = ApproximateGPs.laplace_lml!(f, lf(X), Y)
-    return -lml
-end
-
-function nonewton_objective(theta)
-    _lf = build_latent_gp(theta)
-    return -ApproximateGPs.laplace_lml_nonewton(newt_res[end].f, _lf(X), Y)
-end
-
-using FiniteDifferences
-
-FiniteDifferences.grad(central_fdm(5, 1), full_objective, theta0)
-
-function comp_lml(theta)
-    _lf = build_latent_gp(theta)
-    K = kernelmatrix(_lf.f.kernel, X)
-    dist_y_given_f = _lf.lik
-    return ApproximateGPs.laplace_lml(K, dist_y_given_f, Y; maxiter=100)
-end

From 4095df1c6bc6c34a1bfb524b53c84efc06d4a3f1 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:52:50 +0300
Subject: [PATCH 42/60] bugfix

---
 src/laplace.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 3cea314c..7acf583f 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -295,19 +295,19 @@ function _laplace_predict_intermediates(cache, prior_at_x, xnew)
 end
 
 function StatsBase.mean_and_var(f::LaplacePosteriorGP, x::AbstractVector)
-    f_mean, v = _laplace_predict_intermediates(f.cache, f.prior, x)
+    f_mean, v = _laplace_predict_intermediates(f.data, f.prior, x)
     f_var = var(f.prior.f, x) - vec(sum(v .^ 2; dims=1))
     return f_mean, f_var
 end
 
 function StatsBase.mean_and_cov(f::LaplacePosteriorGP, x::AbstractVector)
-    f_mean, v = _laplace_predict_intermediates(f.cache, f.prior, x)
+    f_mean, v = _laplace_predict_intermediates(f.data, f.prior, x)
     f_cov = cov(f.prior.f, x) - v' * v
     return f_mean, f_cov
 end
 
 function Statistics.mean(f::LaplacePosteriorGP, x::AbstractVector)
-    d_loglik = f.cache.d_loglik
+    d_loglik = f.data.d_loglik
     return mean(f.prior.f, x) + cov(f.prior.f, f.prior.x, x)' * d_loglik
 end
 

From 91d0b34257691e15fd7903e844de2e7f07e648af Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 16:56:48 +0300
Subject: [PATCH 43/60] update notebook

---
 examples/c-comparisons/script.jl | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 91ebc027..33013198 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -56,17 +56,22 @@ function plot_samples!(Xgrid, fpost; samples=100, color=2)
     return plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
 end
 
-# Initialise the parameters
+# Initialise the hyperparameters
 
 theta0 = [0.0, 1.0]
 
 lf = build_latent_gp(theta0)
 
+lf.f.kernel
+
+# Plot samples from approximate posterior
+
 f_post = posterior(LaplaceApproximation(), lf(X), Y)
 
+plot_data()
 plot_samples!(Xgrid, f_post)
 
-# Optimise the parameters
+# ## Optimise the parameters
 
 objective = build_laplace_objective(build_latent_gp, X, Y; newton_warmstart=true)
 
@@ -75,6 +80,12 @@ training_results = Optim.optimize(
 )
 
 lf2 = build_latent_gp(training_results.minimizer)
+
+lf2.f.kernel
+
+# Plot samples from approximate posterior for optimised hyperparameters
+
 f_post2 = posterior(LaplaceApproximation(), lf(X), Y)
 
+plot_data()
 plot_samples!(Xgrid, f_post2)

From bdc3618d0659c4f166cb010b2e882aac6005ba17 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 17:17:31 +0300
Subject: [PATCH 44/60] bugfix 2

---
 src/laplace.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index 7acf583f..d33b16e2 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -278,7 +278,8 @@ end
 
 function AbstractGPs.posterior(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
     dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; la.newton_kwargs...)
-    _, cache = newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
+    _, cache = _newton_inner_loop(dist_y_given_f, ys, K; newton_kwargs...)
+    # TODO: should we run newton_inner_loop() and _laplace_train_intermediates() explicitly?
     f_post = ApproxPosteriorGP(la, lfx.fx, cache)
     # TODO: instead of lfx.fx, should we store lfx itself (including lik)?
     return f_post

From 12655f8f30678f70cfbc9fb7677cd8558ff1036f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 17:25:23 +0300
Subject: [PATCH 45/60] bugfiiiix

---
 examples/c-comparisons/script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 33013198..ead02dab 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -85,7 +85,7 @@ lf2.f.kernel
 
 # Plot samples from approximate posterior for optimised hyperparameters
 
-f_post2 = posterior(LaplaceApproximation(), lf(X), Y)
+f_post2 = posterior(LaplaceApproximation(), lf2(X), Y)
 
 plot_data()
 plot_samples!(Xgrid, f_post2)

From b1c0f80340ad3c84ef46a9f8da8497021f1f6873 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 17:26:02 +0300
Subject: [PATCH 46/60] also plot mean

---
 examples/c-comparisons/script.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index ead02dab..42b7c6ca 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -52,8 +52,10 @@ function build_latent_gp(theta)
 end
 
 function plot_samples!(Xgrid, fpost; samples=100, color=2)
-    fsamples = rand(fpost(Xgrid, 1e-8), samples)
-    return plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
+    fx = fpost(Xgrid, 1e-8)
+    fsamples = rand(fx, samples)
+    plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
+    return plot!(Xgrid, invlink.(mean(fx)); color, alpha=1, lw=2, label="")
 end
 
 # Initialise the hyperparameters

From 3c97c97c40d87b5441f036eaa38a36d4a28de683 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 17:31:03 +0300
Subject: [PATCH 47/60] improve plotting

---
 examples/c-comparisons/script.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 42b7c6ca..c7256bb0 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -24,18 +24,19 @@ Random.seed!(1);
 
 # ## Generate some training data
 
-Xgrid = -5:0.1:29
+Xgrid = -4:0.1:29
 X = range(0, 23.5; length=48)
-fs = @. 3 * sin(10 + 0.6X) + sin(0.1X) - 1
+f(x) = 3 * sin(10 + 0.6x) + sin(0.1x) - 1
+fs = f.(X)
 # invlink = normcdf
 invlink = logistic
 ps = invlink.(fs)
 Y = [rand(Bernoulli(p)) for p in ps]
 
 function plot_data()
-    plot()
-    plot!(X, ps)
-    return scatter!(X, Y)
+    plot(xlims=extrema(Xgrid), xticks=0:6:24)
+    plot!(Xgrid, invlink ∘ f, label="true probabilities")
+    return scatter!(X, Y, label="observations", color=3)
 end
 
 plot_data()
@@ -54,13 +55,13 @@ end
 function plot_samples!(Xgrid, fpost; samples=100, color=2)
     fx = fpost(Xgrid, 1e-8)
     fsamples = rand(fx, samples)
-    plot!(Xgrid, invlink.(fsamples); color, alpha=0.3, label="")
-    return plot!(Xgrid, invlink.(mean(fx)); color, alpha=1, lw=2, label="")
+    plot!(Xgrid, invlink.(fsamples); color, alpha=0.2, label="")
+    return plot!(Xgrid, invlink.(mean(fx)); color, lw=2, label="posterior fit")
 end
 
 # Initialise the hyperparameters
 
-theta0 = [0.0, 1.0]
+theta0 = [0.0, 3.0]
 
 lf = build_latent_gp(theta0)
 
@@ -70,7 +71,7 @@ lf.f.kernel
 
 f_post = posterior(LaplaceApproximation(), lf(X), Y)
 
-plot_data()
+p1 = plot_data()
 plot_samples!(Xgrid, f_post)
 
 # ## Optimise the parameters
@@ -89,5 +90,5 @@ lf2.f.kernel
 
 f_post2 = posterior(LaplaceApproximation(), lf2(X), Y)
 
-plot_data()
+p2 = plot_data()
 plot_samples!(Xgrid, f_post2)

From 81ceb93635159325499231dff04fb6bbf25da272 Mon Sep 17 00:00:00 2001
From: st-- <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 17:51:00 +0300
Subject: [PATCH 48/60] Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 examples/c-comparisons/script.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index c7256bb0..7c333642 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -34,9 +34,9 @@ ps = invlink.(fs)
 Y = [rand(Bernoulli(p)) for p in ps]
 
 function plot_data()
-    plot(xlims=extrema(Xgrid), xticks=0:6:24)
-    plot!(Xgrid, invlink ∘ f, label="true probabilities")
-    return scatter!(X, Y, label="observations", color=3)
+    plot(; xlims=extrema(Xgrid), xticks=0:6:24)
+    plot!(Xgrid, invlink ∘ f; label="true probabilities")
+    return scatter!(X, Y; label="observations", color=3)
 end
 
 plot_data()

From 82d46952c7270c531ea3fa327517b30b8103ccaa Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 18:16:36 +0300
Subject: [PATCH 49/60] remove `@ref` that does not work

---
 src/laplace.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index d33b16e2..91c8c3c1 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -250,7 +250,9 @@ For demonstration purposes: returns an array of all the intermediate
 approximations of each Newton step.
 
 If you are only interested in the actual posterior, use
-[`posterior(::LaplaceApproximation, ::LatentFiniteGP, ::AbstractArray)`](@ref).
+`posterior(::LaplaceApproximation, ...`.
+
+TODO figure out how to get the `@ref` to work...
 """
 function laplace_steps(lfx::LatentFiniteGP, ys; newton_kwargs...)
     dist_y_given_f, K, newton_kwargs = _check_laplace_inputs(lfx, ys; newton_kwargs...)

From aca90b151c7d8c2863d1b2d6047193b5b6abba04 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 18:16:51 +0300
Subject: [PATCH 50/60] cleanup

---
 examples/c-comparisons/script.jl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index c7256bb0..5409d5bd 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -9,11 +9,11 @@
 # ## Setup
 
 using ApproximateGPs
-using ParameterHandling
-using Zygote
-using Distributions
-using LogExpFunctions
 using LinearAlgebra
+using Distributions
+using LogExpFunctions: logistic
+#using ParameterHandling
+using Zygote
 using Optim
 
 using Plots
@@ -28,8 +28,7 @@ Xgrid = -4:0.1:29
 X = range(0, 23.5; length=48)
 f(x) = 3 * sin(10 + 0.6x) + sin(0.1x) - 1
 fs = f.(X)
-# invlink = normcdf
-invlink = logistic
+invlink = logistic  # could use other invlink, e.g. normcdf(f) = cdf(Normal(), f)
 ps = invlink.(fs)
 Y = [rand(Bernoulli(p)) for p in ps]
 

From 02b528babdd90864ca6da23d5c5ae6700c1eb51b Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 18:17:10 +0300
Subject: [PATCH 51/60] make use of closure fields

---
 examples/c-comparisons/script.jl | 2 +-
 test/laplace.jl                  | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 5409d5bd..2e30c57c 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -87,7 +87,7 @@ lf2.f.kernel
 
 # Plot samples from approximate posterior for optimised hyperparameters
 
-f_post2 = posterior(LaplaceApproximation(), lf2(X), Y)
+f_post2 = posterior(LaplaceApproximation(; f_init=objective.f), lf2(X), Y)
 
 p2 = plot_data()
 plot_samples!(Xgrid, f_post2)
diff --git a/test/laplace.jl b/test/laplace.jl
index 69a06494..b42764f8 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -28,9 +28,8 @@ function optimize_elbo(
     newton_warmstart=true,
     newton_callback=nothing,
 )
-    f = similar(xs, length(xs))  # will be mutated in-place to "warm-start" the Newton steps
-    objective = build_laplace_objective!(
-        f, build_latent_gp, xs, ys; newton_warmstart, newton_callback
+    objective = build_laplace_objective(
+        build_latent_gp, xs, ys; newton_warmstart, newton_callback
     )
     objective_grad(θ) = only(Zygote.gradient(objective, θ))
 
@@ -39,7 +38,7 @@ function optimize_elbo(
     )
 
     lf = build_latent_gp(training_results.minimizer)
-    f_post = posterior(LaplaceApproximation(; f_init=f), lf(xs), ys)
+    f_post = posterior(LaplaceApproximation(; f_init=objective.f), lf(xs), ys)
     return f_post, training_results
 end
 

From e788f4ca0614a0908ee152a000790e91aa97d75d Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 23 Sep 2021 23:27:55 +0300
Subject: [PATCH 52/60] yaf

---
 examples/c-comparisons/script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 0627e4a8..f7d2035b 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -11,7 +11,7 @@
 using ApproximateGPs
 using LinearAlgebra
 using Distributions
-using LogExpFunctions: logistic
+using LogExpFunctions: logistic, softplus
 #using ParameterHandling
 using Zygote
 using Optim

From ca68222dc1fb2b3b08aedfa349c2cf9a3b0863cf Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 10:41:56 +0300
Subject: [PATCH 53/60] improved type stability

---
 examples/c-comparisons/script.jl |  8 +++----
 src/laplace.jl                   | 37 +++++++++++++++++++++-----------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index f7d2035b..0cfaab1f 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -11,7 +11,7 @@
 using ApproximateGPs
 using LinearAlgebra
 using Distributions
-using LogExpFunctions: logistic, softplus
+using LogExpFunctions: logistic, softplus, invsoftplus
 #using ParameterHandling
 using Zygote
 using Optim
@@ -28,7 +28,7 @@ Xgrid = -4:0.1:29
 X = range(0, 23.5; length=48)
 f(x) = 3 * sin(10 + 0.6x) + sin(0.1x) - 1
 fs = f.(X)
-invlink = logistic  # could use other invlink, e.g. normcdf(f) = cdf(Normal(), f)
+const invlink = logistic  # could use other invlink, e.g. normcdf(f) = cdf(Normal(), f)
 ps = invlink.(fs)
 Y = [rand(Bernoulli(p)) for p in ps]
 
@@ -42,7 +42,7 @@ plot_data()
 
 # ## Creating the latent GP
 
-dist_y_given_f(f) = Bernoulli(invlink(f))
+const dist_y_given_f(f) = Bernoulli(invlink(f))
 
 function build_latent_gp(theta)
     variance = softplus(theta[1])
@@ -60,7 +60,7 @@ end
 
 # Initialise the hyperparameters
 
-theta0 = [0.0, 3.0]
+theta0 = [invsoftplus(1.0), invsoftplus(5.0)]
 
 lf = build_latent_gp(theta0)
 
diff --git a/src/laplace.jl b/src/laplace.jl
index 91c8c3c1..ead20fc1 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -2,6 +2,17 @@
 ignore_ad(closure) = closure()
 @non_differentiable ignore_ad(closure)
 
+struct LaplaceCache{Tm<:AbstractMatrix,Tv<:AbstractVector,Td<:Diagonal,Tf<:Real,Tc<:Cholesky}
+    K::Tm
+    f::Tv
+    W::Td
+    Wsqrt::Td
+    loglik::Tf
+    d_loglik::Tv
+    B_ch::Tc
+    a::Tv
+end
+
 function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     # Ψ = log p(y|f) + log p(f)
     #   = loglik + log p(f)
@@ -20,23 +31,22 @@ function _laplace_train_intermediates(dist_y_given_f, ys, K, f)
     b = W * f + d_ll
     a = b - Wsqrt * (B_ch \ (Wsqrt * K * b))
 
-    return (; K, f, W, Wsqrt, loglik=ll, d_loglik=d_ll, B_ch, a)
+    #return (; K, f, W, Wsqrt, loglik=ll, d_loglik=d_ll, B_ch, a)
+    return LaplaceCache(K, f, W, Wsqrt, ll, d_ll, B_ch, a)
 end
 
 # dist_y_given_f(f) = Bernoulli(logistic(f))
 
 function loglik_and_derivs(dist_y_given_f, ys::AbstractVector, f::AbstractVector{<:Real})
-    function per_observation(fhat, y)
-        ll(f) = logpdf(dist_y_given_f(f), y)
-        d_ll(f) = ForwardDiff.derivative(ll, f)
-        d2_ll(f) = ForwardDiff.derivative(d_ll, f)
-        return ll(fhat), d_ll(fhat), d2_ll(fhat)
-    end
-    vec_of_l_d_d2 = map(per_observation, f, ys)
-    ls = map(res -> res[1], vec_of_l_d_d2)
+    l(_f, _y) = logpdf(dist_y_given_f(_f), _y)
+    dl(_f, _y) = ForwardDiff.derivative(__f -> l(__f, _y), _f)
+    d2l(_f, _y) = ForwardDiff.derivative(__f -> dl(__f, _y), _f)
+
+    ls = map(l, f, ys)
+    d_ll = map(dl, f, ys)
+    d2_ll = map(d2l, f, ys)
+
     ll = sum(ls)
-    d_ll = map(res -> res[2], vec_of_l_d_d2)
-    d2_ll = map(res -> res[3], vec_of_l_d_d2)
     return ll, d_ll, d2_ll
 end
 
@@ -55,8 +65,9 @@ function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=not
     f = f_init
     cache = nothing
     for i in 1:maxiter
+        f_debug = f[1:3]  # needs to be outside the ignore_ad() for type stability
         ignore_ad() do
-            @debug "  - Newton iteration $i: f[1:3]=$(f[1:3])"
+            @debug "  - Newton iteration $i: f[1:3]=$f_debug"
         end
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
         if !isnothing(callback)
@@ -272,7 +283,7 @@ struct LaplaceApproximation{Tkw}
     newton_kwargs::Tkw
 end
 
-LaplaceApproximation(; newton_kwargs...) = LaplaceApproximation(newton_kwargs)
+LaplaceApproximation(; newton_kwargs...) = LaplaceApproximation((; newton_kwargs...))
 
 function approx_lml(la::LaplaceApproximation, lfx::LatentFiniteGP, ys)
     return laplace_lml(lfx, ys; la.newton_kwargs...)

From 9144ed34e83956e2b09fa6d9bea9c8dc29f9824b Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 10:58:22 +0300
Subject: [PATCH 54/60] replace QuadGK with Gauss-Hermite

---
 Project.toml          |  2 --
 src/ApproximateGPs.jl |  1 -
 src/ep.jl             | 16 ++++++++++------
 test/Project.toml     |  2 ++
 test/runtests.jl      |  7 +++++++
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index 631e032e..6028e8f7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,7 +13,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
 KLDivergences = "3c9cd921-3d3f-41e2-830c-e020174918cc"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -28,7 +27,6 @@ FillArrays = "0.12"
 ForwardDiff = "0.10"
 GPLikelihoods = "0.1, 0.2"
 KLDivergences = "0.2.1"
-QuadGK = "2"
 Reexport = "1"
 SpecialFunctions = "1"
 StatsBase = "0.33"
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 3d4824fc..4ec40fc8 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -23,7 +23,6 @@ include("expected_loglik.jl")
 include("elbo.jl")
 
 using ForwardDiff
-using QuadGK  # TODO replace with FastGaussQuadrature
 
 export LaplaceApproximation
 export laplace_lml, build_laplace_objective, build_laplace_objective!
diff --git a/src/ep.jl b/src/ep.jl
index 45528431..1d878f43 100644
--- a/src/ep.jl
+++ b/src/ep.jl
@@ -53,12 +53,16 @@ function epsite_pdf(site, f)
     return site.Z * pdf(epsite_dist(site), f)
 end
 
-function moment_match(cav_i::UnivariateDistribution, lik_eval_i)
-    lower = mean(cav_i) - 20 * std(cav_i)
-    upper = mean(cav_i) + 20 * std(cav_i)
-    m0, _ = quadgk(f -> pdf(cav_i, f) * lik_eval_i(f), lower, upper)
-    m1, _ = quadgk(f -> f * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
-    m2, _ = quadgk(f -> f^2 * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+function moment_match(cav_i::Union{Normal,NormalCanon}, lik_eval_i; n_points=20)
+    xs, ws = gausshermite(n_points)
+    fs = √2 * std(cav_i) * xs .+ mean(cav_i)
+    scale = (1 / √π)
+    lik_ws = lik_eval_i.(fs) .* ws
+    fs_lik_ws = fs .* lik_ws
+    fs²_lik_ws = fs .* fs_lik_ws
+    m0 = scale * sum(lik_ws)
+    m1 = scale * sum(fs_lik_ws)
+    m2 = scale * sum(fs²_lik_ws)
     matched_Z = m0
     matched_mean = m1 / m0
     matched_var = m2 / m0 - matched_mean^2
diff --git a/test/Project.toml b/test/Project.toml
index f8c698e4..fc789273 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
+QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
@@ -27,4 +28,5 @@ IterTools = "1"
 LogExpFunctions = "0.3"
 Optim = "1"
 PDMats = "0.11"
+QuadGK = "2"
 Zygote = "0.6"
diff --git a/test/runtests.jl b/test/runtests.jl
index c9c4f91f..fcda3374 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,6 +14,7 @@ using Zygote
 using ChainRulesCore
 using ChainRulesTestUtils
 using FiniteDifferences
+using QuadGK
 
 const GROUP = get(ENV, "GROUP", "All")
 const PKGDIR = dirname(dirname(pathof(ApproximateGPs)))
@@ -44,4 +45,10 @@ include("test_utils.jl")
         println(" ")
         @info "Ran laplace tests"
     end
+
+    @testset "EP" begin
+        include("ep.jl")
+        println(" ")
+        @info "Ran ep tests"
+    end
 end

From 771ef3eb0af804282d96c512fc2b467ed884e927 Mon Sep 17 00:00:00 2001
From: st-- <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 10:59:26 +0300
Subject: [PATCH 55/60] Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 src/laplace.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index ead20fc1..7a713443 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -2,7 +2,9 @@
 ignore_ad(closure) = closure()
 @non_differentiable ignore_ad(closure)
 
-struct LaplaceCache{Tm<:AbstractMatrix,Tv<:AbstractVector,Td<:Diagonal,Tf<:Real,Tc<:Cholesky}
+struct LaplaceCache{
+    Tm<:AbstractMatrix,Tv<:AbstractVector,Td<:Diagonal,Tf<:Real,Tc<:Cholesky
+}
     K::Tm
     f::Tv
     W::Td

From b52ef551a6cac28dfed8ac5d7d0970b631dcff12 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 11:10:12 +0300
Subject: [PATCH 56/60] more type stability cleanup

---
 src/laplace.jl | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/laplace.jl b/src/laplace.jl
index ead20fc1..4925cddc 100644
--- a/src/laplace.jl
+++ b/src/laplace.jl
@@ -62,29 +62,25 @@ function _laplace_lml(f, cache)
 end
 
 function _newton_inner_loop(dist_y_given_f, ys, K; f_init, maxiter, callback=nothing)
+    @assert maxiter >= 1
     f = f_init
     cache = nothing
     for i in 1:maxiter
-        f_debug = f[1:3]  # needs to be outside the ignore_ad() for type stability
-        ignore_ad() do
-            @debug "  - Newton iteration $i: f[1:3]=$f_debug"
-        end
+        @debug "  - Newton iteration $i: f[1:3]=$(f[1:3])"
         fnew, cache = _newton_step(dist_y_given_f, ys, K, f)
         if !isnothing(callback)
             callback(fnew, cache)
         end
 
         if isapprox(f, fnew)
-            ignore_ad() do
-                @debug "  + converged"
-            end
+            @debug "  + converged"
             #f = fnew
             break  # converged
         else
             f = fnew
         end
     end
-    return f, cache
+    return f, something(cache)
 end
 
 # Currently, we have a separate function that returns only f_opt to simplify frule/rrule

From 4a694d994f90af687215eb34cd750f4ee3fe409c Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 11:15:03 +0300
Subject: [PATCH 57/60] fix test

---
 test/laplace.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index b42764f8..70603612 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -117,7 +117,7 @@ end
         return f_opt, newton_from_L_pullback
     end
 
-    fkwargs = (; f_init=zero(ys), maxiter=100)
+    fkwargs = (; f_init=zeros(length(ys)), maxiter=100)
     test_frule(newton_inner_loop_from_L, dist_y_given_f, ys, L; fkwargs)
     test_rrule(newton_inner_loop_from_L, dist_y_given_f, ys, L; fkwargs)
 end

From 62840adc9c7449c851386abd62f826b345ccb480 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 11:15:07 +0300
Subject: [PATCH 58/60] add missing test file

---
 test/ep.jl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 test/ep.jl

diff --git a/test/ep.jl b/test/ep.jl
new file mode 100644
index 00000000..95a1638b
--- /dev/null
+++ b/test/ep.jl
@@ -0,0 +1,21 @@
+@testset "moment_match" begin
+    function moment_match_quadgk(cav_i::UnivariateDistribution, lik_eval_i)
+        lower = mean(cav_i) - 20 * std(cav_i)
+        upper = mean(cav_i) + 20 * std(cav_i)
+        m0, _ = quadgk(f -> pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+        m1, _ = quadgk(f -> f * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+        m2, _ = quadgk(f -> f^2 * pdf(cav_i, f) * lik_eval_i(f), lower, upper)
+        matched_Z = m0
+        matched_mean = m1 / m0
+        matched_var = m2 / m0 - matched_mean^2
+        return (; Z=matched_Z, q=Normal(matched_mean, sqrt(matched_var)))
+    end
+
+    cav_i = Normal(0.8231, 3.213622)
+    lik_eval_i = f -> pdf(Bernoulli(logistic(f)), true)
+    Z_gh, q_gh = ApproximateGPs.moment_match(cav_i, lik_eval_i; n_points=100)
+    Z_quad, q_quad = moment_match_quadgk(cav_i, lik_eval_i)
+    @test Z_gh ≈ Z_quad
+    @test mean(q_gh) ≈ mean(q_quad)
+    @test std(q_gh) ≈ std(q_quad)
+end

From 6f7a5ba2c926c21ca453fdf12ef3e8cd92408308 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 16:53:08 +0300
Subject: [PATCH 59/60] more explanation on the example script

---
 examples/c-comparisons/script.jl | 80 ++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 24 deletions(-)

diff --git a/examples/c-comparisons/script.jl b/examples/c-comparisons/script.jl
index 0cfaab1f..2380d55a 100644
--- a/examples/c-comparisons/script.jl
+++ b/examples/c-comparisons/script.jl
@@ -1,10 +1,10 @@
-# # Binary Classification with Various Approximations
-# 
+# # Binary Classification with Laplace approximation
 #
 # This example demonstrates how to carry out non-conjugate Gaussian process
-# inference using different approximations.
-# For a basic introduction to the functionality of this library, please refer to the
-# [User Guide](@ref).
+# inference using the Laplace approximation.
+#
+# For a basic introduction to the functionality of this library, please refer
+# to the [User Guide](@ref).
 #
 # ## Setup
 
@@ -23,14 +23,16 @@ using Random
 Random.seed!(1);
 
 # ## Generate some training data
+#
+# We create a binary-labelled toy dataset:
 
-Xgrid = -4:0.1:29
-X = range(0, 23.5; length=48)
-f(x) = 3 * sin(10 + 0.6x) + sin(0.1x) - 1
-fs = f.(X)
+Xgrid = -4:0.1:29  # for visualization
+X = range(0, 23.5; length=48)  # training inputs
+f(x) = 3 * sin(10 + 0.6x) + sin(0.1x) - 1  # latent function
+fs = f.(X)  # latent function values at training inputs
 const invlink = logistic  # could use other invlink, e.g. normcdf(f) = cdf(Normal(), f)
-ps = invlink.(fs)
-Y = [rand(Bernoulli(p)) for p in ps]
+ps = invlink.(fs)  # probabilities at the training inputs
+Y = [rand(Bernoulli(p)) for p in ps]  # observations at the training inputs
 
 function plot_data()
     plot(; xlims=extrema(Xgrid), xticks=0:6:24)
@@ -41,24 +43,30 @@ end
 plot_data()
 
 # ## Creating the latent GP
-
-const dist_y_given_f(f) = Bernoulli(invlink(f))
+# 
+# Here we write a function that creates our latent GP prior, given the
+# hyperparameter vector `theta`. Compared to a "vanilla" GP, the `LatentGP`
+# requires a function or functor that maps from the latent GP `f` to the
+# distribution of observations `y`. This functor is commonly called "the
+# likelihood".
 
 function build_latent_gp(theta)
+    # `theta` is unconstrained, but kernel variance and lengthscale must be positive:
     variance = softplus(theta[1])
     lengthscale = softplus(theta[2])
+
     kernel = variance * with_lengthscale(SqExponentialKernel(), lengthscale)
-    return LatentGP(GP(kernel), dist_y_given_f, 1e-8)
-end
+    
+    dist_y_given_f = BernoulliLikelihood()  # has logistic invlink by default
+    # We could also be explicit and define it as a function:
+    # dist_y_given_f(f) = Bernoulli(invlink(f))
 
-function plot_samples!(Xgrid, fpost; samples=100, color=2)
-    fx = fpost(Xgrid, 1e-8)
-    fsamples = rand(fx, samples)
-    plot!(Xgrid, invlink.(fsamples); color, alpha=0.2, label="")
-    return plot!(Xgrid, invlink.(mean(fx)); color, lw=2, label="posterior fit")
+    jitter = 1e-8  # required for numeric stability [TODO: where to explain this better?]
+    return LatentGP(GP(kernel), dist_y_given_f, jitter)
 end
 
-# Initialise the hyperparameters
+# We define a latent GP at our initial hyperparameter values, here with
+# variance 1.0 and lengthscale 5.0:
 
 theta0 = [invsoftplus(1.0), invsoftplus(5.0)]
 
@@ -66,28 +74,52 @@ lf = build_latent_gp(theta0)
 
 lf.f.kernel
 
-# Plot samples from approximate posterior
+# We can now compute the posterior `p(f | y)` under the Laplace approximation:
 
 f_post = posterior(LaplaceApproximation(), lf(X), Y)
 
+# Let's plot samples from this approximate posterior:
+
+function plot_samples!(Xgrid, fpost; samples=100, color=2)
+    fx = fpost(Xgrid, 1e-8)
+    fsamples = rand(fx, samples)
+    plot!(Xgrid, invlink.(fsamples); color, alpha=0.2, label="")
+    return plot!(Xgrid, invlink.(mean(fx)); color, lw=2, label="posterior fit")
+end
+
 p1 = plot_data()
 plot_samples!(Xgrid, f_post)
 
+# We can improve this fit by optimising the hyperparameters.
+
 # ## Optimise the parameters
+#
+# ApproximateGPs provides a convenience function `build_laplace_objective` that
+# constructs an objective function for optimising the hyperparameters, based on
+# the Laplace approximation to the log marginal likelihood.
+# We pass this objective to Optim.jl's LBFGS optimiser:
 
-objective = build_laplace_objective(build_latent_gp, X, Y; newton_warmstart=true)
+objective = build_laplace_objective(build_latent_gp, X, Y)
 
 training_results = Optim.optimize(
     objective, θ -> only(Zygote.gradient(objective, θ)), theta0, LBFGS(); inplace=false
 )
 
+# Now that we have (hopefully) better hyperparameter values, we need to construct a LatentGP prior with these values:
+
 lf2 = build_latent_gp(training_results.minimizer)
 
 lf2.f.kernel
 
-# Plot samples from approximate posterior for optimised hyperparameters
+# Finally, we need to obtain the posterior given the observations again:
 
 f_post2 = posterior(LaplaceApproximation(; f_init=objective.f), lf2(X), Y)
 
+# `f_init=objective.f` let's the Laplace approximation "warm-start" at the last
+# point of the inner-loop Newton optimisation; `objective.f` is a field on the
+# `objective` closure.
+
+# Let's plot samples from the approximate posterior for the optimised hyperparameters:
+
 p2 = plot_data()
 plot_samples!(Xgrid, f_post2)

From 5445a95c784542aea26f1e021f8d8f1b478fede1 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 24 Sep 2021 16:54:20 +0300
Subject: [PATCH 60/60] fix test seed

---
 test/laplace.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/laplace.jl b/test/laplace.jl
index 70603612..3e09eb24 100644
--- a/test/laplace.jl
+++ b/test/laplace.jl
@@ -43,12 +43,14 @@ function optimize_elbo(
 end
 
 @testset "Gaussian" begin
-    # should check for convergence in one step, and agreement with exact GPR
+    # TODO: check for convergence in one step, and agreement with exact GPR
+    # move to test/equivalences.jl?
 end
 
 @testset "gradients" begin
     X, Y = generate_data()
     @testset "laplace_lml" begin
+        Random.seed!(123)
         theta0 = rand(2)
         function objective(theta)
             lf = build_latent_gp(theta)
@@ -62,6 +64,8 @@ end
 end
 
 @testset "chainrule" begin
+    Random.seed!(54321)
+
     xs = [0.2, 0.3, 0.7]
     ys = [1, 1, 0]
     L = randn(3, 3)