From d04a73e6eb5fe54c4c9ae07a8fcc8d1f8263335e Mon Sep 17 00:00:00 2001
From: Kyle Stevenson <Kyle.Stevenson@stfc.ac.uk>
Date: Tue, 21 Nov 2023 09:32:09 +0000
Subject: [PATCH] Add Tensorflow-Keras Example

---
 TensorflowExample/Dockerfile.gpu  |  14 ++++
 TensorflowExample/test-MNIST.py   | 124 ++++++++++++++++++++++++++++++
 TensorflowExample/test-gpu-tf.py  |  82 ++++++++++++++++++++
 TensorflowExample/test-mlgpu.yaml |  35 +++++++++
 4 files changed, 255 insertions(+)
 create mode 100644 TensorflowExample/Dockerfile.gpu
 create mode 100644 TensorflowExample/test-MNIST.py
 create mode 100644 TensorflowExample/test-gpu-tf.py
 create mode 100644 TensorflowExample/test-mlgpu.yaml

diff --git a/TensorflowExample/Dockerfile.gpu b/TensorflowExample/Dockerfile.gpu
new file mode 100644
index 0000000..5384d97
--- /dev/null
+++ b/TensorflowExample/Dockerfile.gpu
@@ -0,0 +1,14 @@
+FROM tensorflow/tensorflow:2.1.2-gpu
+
+# Don't use a requirments file here. Let pip sort out matplotlib.
+RUN pip install --upgrade pip
+RUN pip install matplotlib
+
+# Create the data paths.
+RUN mkdir -p /data/inputs/
+RUN mkdir -p /data/outputs/
+
+COPY test-MNIST.py .
+COPY test-gpu-tf.py .
+
+ENTRYPOINT python test-MNIST.py; python test-gpu-tf.py
diff --git a/TensorflowExample/test-MNIST.py b/TensorflowExample/test-MNIST.py
new file mode 100644
index 0000000..625e439
--- /dev/null
+++ b/TensorflowExample/test-MNIST.py
@@ -0,0 +1,124 @@
+# Run a simple machine learning classifier. Taken from the Tensorflow (KERAS) examples.
+# https://www.tensorflow.org/tutorials/keras/classification
+
+# TensorFlow and tf.keras
+import tensorflow as tf
+from tensorflow.python.client import device_lib
+
+# Helper libraries
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+
+gPATHI = ""
+gPATHO = ""
+# Pre-amble to setup folders
+isDAFNI = os.environ.get("ISDAFNI")
+print("ISDAFNI Environment variable = ", isDAFNI, type(isDAFNI))
+if isDAFNI == "True":
+    if os.name == "nt":
+        pren = os.environ.get("HOMEDRIVE")
+    else:
+        pren = "/"
+    gPATHI = os.path.join(pren, "data", "inputs")
+    gPATHO = os.path.join(pren, "data", "outputs")
+    print("Running within DAFNI: ", gPATHO)
+else:
+    print("Not running within DAFNI, using run directory")
+
+#physical_devices = tf.config.experimental.list_physical_devices('GPU') # tf.config.list_physical_devices('GPU')
+#for device in physical_devices:
+#    tf.config.experimental.set_memory_growth(device, True)
+
+print(tf.__version__)
+local_dev_ps = device_lib.list_local_devices()
+for dev in local_dev_ps:
+    print(dev.name)
+
+class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
+               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
+
+fashion_mnist = tf.keras.datasets.fashion_mnist
+(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
+
+train_images = train_images/255.0
+test_images = test_images/255.0
+
+plt.figure(figsize=(10, 10))
+for i in range(25):
+    plt.subplot(5,5,i+1)
+    plt.xticks([])
+    plt.yticks([])
+    plt.grid(False)
+    plt.imshow(train_images[i], cmap=plt.cm.binary)
+    plt.xlabel(class_names[train_labels[i]])
+plt.savefig(os.path.join(gPATHO, 'clothes.png'))
+
+model = tf.keras.Sequential([
+    tf.keras.layers.Flatten(input_shape=(28, 28)),
+    tf.keras.layers.Dense(128, activation='relu'),
+    tf.keras.layers.Dense(10)
+])
+
+model.compile(optimizer='adam',
+              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+              metrics=['accuracy'])
+
+history = model.fit(train_images, train_labels, epochs=10)
+print(history.history.keys())
+
+fig, ax = plt.subplots(figsize=(8, 6))
+ax.plot(history.history['loss'], label='Loss fn')
+ax.plot(history.history['accuracy'], label='Accuracy')
+plt.xlabel('Epoch')
+plt.ylabel('Loss fn/Accuracy')
+plt.legend()
+plt.savefig(os.path.join(gPATHO, 'training.png'))
+
+test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
+print('\nTest accuracy:', test_acc)
+
+probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
+predictions = probability_model.predict(test_images)
+
+def plot_image(i, predictions_array, true_label, img):
+    true_label, img = true_label[i], img[i]
+    plt.grid(False)
+    plt.xticks([])
+    plt.yticks([])
+    plt.imshow(img, cmap=plt.cm.binary)
+    predicted_label = np.argmax(predictions_array)
+    if predicted_label == true_label:
+        color = 'blue'
+    else:
+        color = 'red'
+
+    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
+                                            100*np.max(predictions_array),
+                                            class_names[true_label]),
+                                            color=color)
+
+def plot_value_array(i, predictions_array, true_label):
+    true_label = true_label[i]
+    plt.grid(False)
+    plt.xticks(range(10))
+    plt.yticks([])
+    thisplot = plt.bar(range(10), predictions_array, color="#777777")
+    plt.ylim([0, 1])
+    predicted_label = np.argmax(predictions_array)
+    thisplot[predicted_label].set_color('red')
+    thisplot[true_label].set_color('blue')
+
+# Plot the first X test images, their predicted labels, and the true labels.
+# Color correct predictions in blue and incorrect predictions in red.
+num_rows = 5
+num_cols = 3
+num_images = num_rows*num_cols
+plt.figure(figsize=(2*2*num_cols, 2*num_rows))
+for i in range(num_images):
+  plt.subplot(num_rows, 2*num_cols, 2*i+1)
+  plot_image(i, predictions[i], test_labels, test_images)
+  plt.subplot(num_rows, 2*num_cols, 2*i+2)
+  plot_value_array(i, predictions[i], test_labels)
+plt.tight_layout()
+plt.savefig(os.path.join(gPATHO, 'predictions.png'))
diff --git a/TensorflowExample/test-gpu-tf.py b/TensorflowExample/test-gpu-tf.py
new file mode 100644
index 0000000..a41405a
--- /dev/null
+++ b/TensorflowExample/test-gpu-tf.py
@@ -0,0 +1,82 @@
+# Test out Tensorflow GPU calcs vrs CPU
+# Adapted from test example in :-
+# https://stackoverflow.com/questions/55749899/training-a-simple-model-in-tensorflow-gpu-slower-than-cpu
+
+import os
+import time
+import tensorflow as tf
+from tensorflow.python.client import device_lib
+import matplotlib.pyplot as plt
+
+gPATHI = ""
+gPATHO = ""
+# Pre-amble to setup folders
+isDAFNI = os.environ.get("ISDAFNI")
+print("ISDAFNI Environment variable = ", isDAFNI, type(isDAFNI))
+if isDAFNI == "True":
+    if os.name == "nt":
+        pren = os.environ.get("HOMEDRIVE")
+    else:
+        pren = "/"
+    gPATHI = os.path.join(pren, "data", "inputs")
+    gPATHO = os.path.join(pren, "data", "outputs")
+    print("Running within DAFNI: ", gPATHO)
+else:
+    print("Not running within DAFNI, using run directory")
+
+tf.compat.v1.disable_eager_execution()
+
+local_dev_ps = device_lib.list_local_devices()
+for dv in local_dev_ps: print(dv.name)
+
+# Set matrix sizes eg. 50x50, 100x100 etc
+sizes = [50, 100, 500, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
+
+cpu_times = []
+for size in sizes:
+    tf.compat.v1.reset_default_graph()
+    with tf.device('cpu:0'):
+        start = time.time()
+        v1 = tf.Variable(tf.random.normal((size, size)))
+        v2 = tf.Variable(tf.random.normal((size, size)))
+        op = tf.matmul(v1, v2)
+        det = tf.linalg.det(op)
+        op2 = tf.linalg.inv(op)
+    with tf.compat.v1.Session() as sess: # config=config) as sess:
+        sess.run(tf.compat.v1.global_variables_initializer())
+        sess.run(op)
+        sess.run(det)
+        sess.run(op2)
+    cpu_times.append(time.time() - start)
+    print('cpu time took: {0:.4f}'.format(time.time() - start))
+
+try:
+    gpu_times = []
+    for size in sizes:
+        tf.compat.v1.reset_default_graph()
+        with tf.device('gpu:0'):
+            start = time.time()
+            v1 = tf.Variable(tf.random.normal((size, size)))
+            v2 = tf.Variable(tf.random.normal((size, size)))
+            op = tf.matmul(v1, v2)
+            det = tf.linalg.det(op)
+            op2 = tf.linalg.inv(op)
+        with tf.compat.v1.Session() as sess: #config=config) as sess:
+            sess.run(tf.compat.v1.global_variables_initializer())
+            sess.run(op)
+            sess.run(det)
+            sess.run(op2)
+        gpu_times.append(time.time() - start)
+        print('gpu time took: {0:.4f}'.format(time.time() - start))
+except Exception as err:
+    # Handle case with no appropriate GPU (or Cuda not installed)
+    print("Issue running using GPU(CUDA) :", type(err).__name__, err)
+    gpu_times = [0.0] * len(sizes)
+
+fig, ax = plt.subplots(figsize=(8, 6))
+ax.plot(sizes, gpu_times, label='GPU')
+ax.plot(sizes, cpu_times, label='CPU')
+plt.xlabel('Matrix Size')
+plt.ylabel('Calc. Time (sec)')
+plt.legend()
+plt.savefig(os.path.join(gPATHO, 'times.png'))
diff --git a/TensorflowExample/test-mlgpu.yaml b/TensorflowExample/test-mlgpu.yaml
new file mode 100644
index 0000000..a1e13eb
--- /dev/null
+++ b/TensorflowExample/test-mlgpu.yaml
@@ -0,0 +1,35 @@
+kind: M
+api_version: v1beta3
+metadata:
+  display_name: Tensorflow Test Model
+  name: test-tensorflow
+  publisher: DAFNI Example Models
+  contact_point_name:  DAFNI
+  contact_point_email: info@dafni.ac.uk
+  summary: >
+    Test Tensorflow and GPU Running on DAFNI
+  description: >
+    Run Matrix & ML Example (Tensorflow-Keras example) within the DAFNI framework
+  source_code: https://github.com/dafnifacility/dafni-example-models/tree/master/TensorflowExample
+
+spec:
+  resources:
+    use_gpu: true
+  inputs:
+    parameters:
+      - name: ISDAFNI
+        title: Running on DAFNI
+        type: boolean
+        default: True
+        description: True or False
+        required: True
+  outputs:
+    datasets:
+      - name: times.png
+        type: png
+        description: >
+          A png graph showing linear algebra timing (tensorflow)
+      - name: prediction.png
+        type: png
+        description: >
+          A png graph showing image classification predictions (tensorflow)