Skip to content

Commit

Permalink
fixes old faithful example
Browse files Browse the repository at this point in the history
  • Loading branch information
MArpogaus committed Feb 6, 2024
1 parent fb6b120 commit 31a7059
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 38 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/of_cml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ jobs:
echo '# Old Faithful' >> report.md
echo '## Learning Curve' >> report.md
echo '![](metrics/old_faithful/of_hist.png)' >> report.md
echo '![](artifacts/old_faithful/of_hist.png)' >> report.md
echo '## Metrics' >> report.md
cat metrics/old_faithful/of_metrics.txt >> report.md
echo '## Parameter Vector' >> report.md
cat metrics/old_faithful/of_pvector.txt >> report.md
cat artifacts/old_faithful/of_pvector.txt >> report.md
echo '## Flow' >> report.md
cat artifacts/old_faithful/of_flow.png >> report.md
echo '## Results' >> report.md
echo '![](metrics/old_faithful/of_dist.png)' >> report.md
echo '![](artifacts/old_faithful/of_dist.png)' >> report.md
cml comment create report.md
77 changes: 43 additions & 34 deletions cml/old_faithful.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,57 @@
# author : Marcel Arpogaus <marcel dot arpogaus at gmail dot com>
#
# created : 2021-03-22 11:14:00 (Marcel Arpogaus)
# changed : 2023-02-07 08:31:36 (Marcel Arpogaus)
# changed : 2024-02-06 14:42:02 (Marcel Arpogaus)
# DESCRIPTION ############################################################
# ...
# LICENSE ################################################################
# ...
##########################################################################

# %% Imports
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense, InputLayer

from bernstein_flow.distributions import BernsteinFlow
from bernstein_flow.util.visualization import plot_flow
from tensorflow.keras.layers import Dense, InputLayer

# Ensure Reproducibility
# %% Ensure Reproducibility
np.random.seed(2)
tf.random.set_seed(2)
print("TFP Version", tfp.__version__)
print("TF Version", tf.__version__)


# Function Definitions
def negloglik(y_true, y_hat):
nll = -y_hat.log_prob(y_true)
# %% Function Definitions
def bnf(pv):
return BernsteinFlow.new(
pv,
scale_data=True,
shift_data=True,
scale_base_distribution=False,
extrapolation=True,
)


def negloglik(y_true, pv):
nll = -bnf(pv).log_prob(y_true)
return nll


# Data extracted from
# %% Data extracted from
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/faithful.html.
#
# Reference: A. Azzalini and A. W. Bowman, “A Look at Some Data on the
# Old Faithful Geyser,” Journal of the Royal Statistical Society. Series C
# (Applied Statistics), vol. 39, no. 3, pp. 357–365, 1990, doi:
# 10.2307/2347385.
# fmt: off
y = np.asarray(
(
# fmt: off
0.6694, 0.3583, 0.6667, 0.6667, 0.6667, 0.3333, 0.7306, 0.7139, 0.3389,
0.8056, 0.3056, 0.9083, 0.2694, 0.8111, 0.7306, 0.2944, 0.7778, 0.3333,
0.7889, 0.7028, 0.3167, 0.8278, 0.3333, 0.6667, 0.3333, 0.6667, 0.4722,
Expand Down Expand Up @@ -80,16 +90,16 @@ def negloglik(y_true, y_hat):
0.6667, 0.6667, 0.6667, 0.3333, 0.6667, 0.3222, 0.7222, 0.2778, 0.7944,
0.325, 0.7806, 0.3222, 0.7361, 0.3556, 0.6806, 0.3444, 0.6667, 0.6667,
0.3333,
# fmt on
),
np.float32,
)
# fmt: on


x = np.ones((y.shape[0], 1)) # We us ones to mimic unconditional data


# TensorFlow Dataset API
# %% TensorFlow Dataset API


dataset = tf.data.Dataset.from_tensor_slices((x, y))
Expand All @@ -99,7 +109,7 @@ def negloglik(y_true, y_hat):
dataset


# Fitting the Normalizing Flow to the data
# %% Fitting the Normalizing Flow to the data


bernstein_order = 9
Expand All @@ -109,10 +119,6 @@ def negloglik(y_true, y_hat):
flow_model.add(InputLayer(input_shape=(1)))
# Here could come a gigantus network
flow_model.add(Dense(3 + bernstein_order))
flow_model.add(
tfp.layers.DistributionLambda(lambda pv: BernsteinFlow.from_pvector(pv))
) # <--- Replace the Normal distribution with the Transformed Distribution


flow_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)

Expand All @@ -122,43 +128,46 @@ def negloglik(y_true, y_hat):
)


# Result
result_path = "metrics/old_faithful/"
# %% Result
metrcis_path = "metrics/old_faithful/"
artifacts_path = "artifacts/old_faithful/"

if not os.path.exists(result_path):
os.makedirs(result_path)
os.makedirs(metrcis_path, exist_ok=True)
os.makedirs(artifacts_path, exist_ok=True)

hist_df = pd.DataFrame(hist.history)
hist_df.to_csv(result_path + "of_hist.csv")
hist_df.to_csv(artifacts_path + "of_hist.csv")

with open(metrcis_path + "of_metrics.txt", "w") as metrics:
metrics.write("Min of loss: " + str(hist_df.loss.min()) + "\n")

fig = hist_df.loss.plot(figsize=(16, 8)).get_figure()
fig.savefig(result_path + "of_hist.png")
fig.savefig(artifacts_path + "of_hist.png")

flow = flow_model(np.ones((1, 1), dtype="float32"))
flow = bnf(flow_model(np.ones((1, 1), dtype="float32")))

fig = plot_flow(flow)
fig.savefig(artifacts_path + "of_flow.png")

times = np.linspace(0, 1.2)
fp = flow.prob(times)

fig = plt.figure(figsize=(16, 16))
plt.hist(y, 20, density=True)
plt.plot(times, fp)
fig.savefig(result_path + "of_dist.png")

fig.savefig(artifacts_path + "of_dist.png")

with open(result_path + "of_metrics.txt", "w") as metrics:
metrics.write("Min of loss: " + str(hist_df.loss.min()) + "\n")

a2 = flow.bijector.bijector.bijectors[0].scale
thetas = flow.bijector.bijector.bijectors[1].thetas
a1 = flow.bijector.bijector.bijectors[3].scale
b1 = flow.bijector.bijector.bijectors[4].shift
thetas = flow.bijector.bijector.bijectors[0].thetas
a1 = flow.bijector.bijector.bijectors[1].scale
b1 = flow.bijector.bijector.bijectors[2].shift


with open(result_path + "of_pvector.txt", "w") as pvector:
with open(artifacts_path + "of_pvector.txt", "w") as pvector:
pvector.write(
f"""
a1 = {repr(a1.numpy().flatten())}
b1 = {repr(b1.numpy().flatten())}
thetas = {repr(thetas.numpy().flatten())}
a2 = {repr(a2.numpy().flatten())}
"""
)
2 changes: 1 addition & 1 deletion metrics/old_faithful/of_metrics.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Min of loss: -0.745232343673706
Min of loss: -0.7447453737258911

1 comment on commit 31a7059

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bimodal Model

Learning Curve

Learning Curve

Metrics

loss: -0.9322965145111084
val_loss: -0.9703801870346069

Results

Parameter Vector for x = 1

BernsteinFlow:
invert_chain_of_bpoly_of_scale1_of_shift1:
chain_of_bpoly_of_scale1_of_shift1:
bpoly: [-12.922276 -6.4072437 0.10778809 0.10861863 0.10862866
0.10863866 0.10864866 0.10865866 0.10866866 0.10867865
0.10868867 0.10869873 0.10870873 0.108719 0.22388256
1.4789325 1.5048746 2.2524326 2.9999905 ]
scale1: -0.47959089279174805
shift1: -1.3736636638641357

Flow



Bijector


Please sign in to comment.