From d04a73e6eb5fe54c4c9ae07a8fcc8d1f8263335e Mon Sep 17 00:00:00 2001 From: Kyle Stevenson Date: Tue, 21 Nov 2023 09:32:09 +0000 Subject: [PATCH] Add Tensorflow-Keras Example --- TensorflowExample/Dockerfile.gpu | 14 ++++ TensorflowExample/test-MNIST.py | 124 ++++++++++++++++++++++++++++++ TensorflowExample/test-gpu-tf.py | 82 ++++++++++++++++++++ TensorflowExample/test-mlgpu.yaml | 35 +++++++++ 4 files changed, 255 insertions(+) create mode 100644 TensorflowExample/Dockerfile.gpu create mode 100644 TensorflowExample/test-MNIST.py create mode 100644 TensorflowExample/test-gpu-tf.py create mode 100644 TensorflowExample/test-mlgpu.yaml diff --git a/TensorflowExample/Dockerfile.gpu b/TensorflowExample/Dockerfile.gpu new file mode 100644 index 0000000..5384d97 --- /dev/null +++ b/TensorflowExample/Dockerfile.gpu @@ -0,0 +1,14 @@ +FROM tensorflow/tensorflow:2.1.2-gpu + +# Don't use a requirments file here. Let pip sort out matplotlib. +RUN pip install --upgrade pip +RUN pip install matplotlib + +# Create the data paths. +RUN mkdir -p /data/inputs/ +RUN mkdir -p /data/outputs/ + +COPY test-MNIST.py . +COPY test-gpu-tf.py . + +ENTRYPOINT python test-MNIST.py; python test-gpu-tf.py diff --git a/TensorflowExample/test-MNIST.py b/TensorflowExample/test-MNIST.py new file mode 100644 index 0000000..625e439 --- /dev/null +++ b/TensorflowExample/test-MNIST.py @@ -0,0 +1,124 @@ +# Run a simple machine learning classifier. Taken from the Tensorflow (KERAS) examples. +# https://www.tensorflow.org/tutorials/keras/classification + +# TensorFlow and tf.keras +import tensorflow as tf +from tensorflow.python.client import device_lib + +# Helper libraries +import os +import numpy as np +import matplotlib.pyplot as plt + +gPATHI = "" +gPATHO = "" +# Pre-amble to setup folders +isDAFNI = os.environ.get("ISDAFNI") +print("ISDAFNI Environment variable = ", isDAFNI, type(isDAFNI)) +if isDAFNI == "True": + if os.name == "nt": + pren = os.environ.get("HOMEDRIVE") + else: + pren = "/" + gPATHI = os.path.join(pren, "data", "inputs") + gPATHO = os.path.join(pren, "data", "outputs") + print("Running within DAFNI: ", gPATHO) +else: + print("Not running within DAFNI, using run directory") + +#physical_devices = tf.config.experimental.list_physical_devices('GPU') # tf.config.list_physical_devices('GPU') +#for device in physical_devices: +# tf.config.experimental.set_memory_growth(device, True) + +print(tf.__version__) +local_dev_ps = device_lib.list_local_devices() +for dev in local_dev_ps: + print(dev.name) + +class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', + 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + +fashion_mnist = tf.keras.datasets.fashion_mnist +(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() + +train_images = train_images/255.0 +test_images = test_images/255.0 + +plt.figure(figsize=(10, 10)) +for i in range(25): + plt.subplot(5,5,i+1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(train_images[i], cmap=plt.cm.binary) + plt.xlabel(class_names[train_labels[i]]) +plt.savefig(os.path.join(gPATHO, 'clothes.png')) + +model = tf.keras.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(10) +]) + +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + +history = model.fit(train_images, train_labels, epochs=10) +print(history.history.keys()) + +fig, ax = plt.subplots(figsize=(8, 6)) +ax.plot(history.history['loss'], label='Loss fn') +ax.plot(history.history['accuracy'], label='Accuracy') +plt.xlabel('Epoch') +plt.ylabel('Loss fn/Accuracy') +plt.legend() +plt.savefig(os.path.join(gPATHO, 'training.png')) + +test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) +print('\nTest accuracy:', test_acc) + +probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()]) +predictions = probability_model.predict(test_images) + +def plot_image(i, predictions_array, true_label, img): + true_label, img = true_label[i], img[i] + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + plt.imshow(img, cmap=plt.cm.binary) + predicted_label = np.argmax(predictions_array) + if predicted_label == true_label: + color = 'blue' + else: + color = 'red' + + plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label], + 100*np.max(predictions_array), + class_names[true_label]), + color=color) + +def plot_value_array(i, predictions_array, true_label): + true_label = true_label[i] + plt.grid(False) + plt.xticks(range(10)) + plt.yticks([]) + thisplot = plt.bar(range(10), predictions_array, color="#777777") + plt.ylim([0, 1]) + predicted_label = np.argmax(predictions_array) + thisplot[predicted_label].set_color('red') + thisplot[true_label].set_color('blue') + +# Plot the first X test images, their predicted labels, and the true labels. +# Color correct predictions in blue and incorrect predictions in red. +num_rows = 5 +num_cols = 3 +num_images = num_rows*num_cols +plt.figure(figsize=(2*2*num_cols, 2*num_rows)) +for i in range(num_images): + plt.subplot(num_rows, 2*num_cols, 2*i+1) + plot_image(i, predictions[i], test_labels, test_images) + plt.subplot(num_rows, 2*num_cols, 2*i+2) + plot_value_array(i, predictions[i], test_labels) +plt.tight_layout() +plt.savefig(os.path.join(gPATHO, 'predictions.png')) diff --git a/TensorflowExample/test-gpu-tf.py b/TensorflowExample/test-gpu-tf.py new file mode 100644 index 0000000..a41405a --- /dev/null +++ b/TensorflowExample/test-gpu-tf.py @@ -0,0 +1,82 @@ +# Test out Tensorflow GPU calcs vrs CPU +# Adapted from test example in :- +# https://stackoverflow.com/questions/55749899/training-a-simple-model-in-tensorflow-gpu-slower-than-cpu + +import os +import time +import tensorflow as tf +from tensorflow.python.client import device_lib +import matplotlib.pyplot as plt + +gPATHI = "" +gPATHO = "" +# Pre-amble to setup folders +isDAFNI = os.environ.get("ISDAFNI") +print("ISDAFNI Environment variable = ", isDAFNI, type(isDAFNI)) +if isDAFNI == "True": + if os.name == "nt": + pren = os.environ.get("HOMEDRIVE") + else: + pren = "/" + gPATHI = os.path.join(pren, "data", "inputs") + gPATHO = os.path.join(pren, "data", "outputs") + print("Running within DAFNI: ", gPATHO) +else: + print("Not running within DAFNI, using run directory") + +tf.compat.v1.disable_eager_execution() + +local_dev_ps = device_lib.list_local_devices() +for dv in local_dev_ps: print(dv.name) + +# Set matrix sizes eg. 50x50, 100x100 etc +sizes = [50, 100, 500, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000] + +cpu_times = [] +for size in sizes: + tf.compat.v1.reset_default_graph() + with tf.device('cpu:0'): + start = time.time() + v1 = tf.Variable(tf.random.normal((size, size))) + v2 = tf.Variable(tf.random.normal((size, size))) + op = tf.matmul(v1, v2) + det = tf.linalg.det(op) + op2 = tf.linalg.inv(op) + with tf.compat.v1.Session() as sess: # config=config) as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(op) + sess.run(det) + sess.run(op2) + cpu_times.append(time.time() - start) + print('cpu time took: {0:.4f}'.format(time.time() - start)) + +try: + gpu_times = [] + for size in sizes: + tf.compat.v1.reset_default_graph() + with tf.device('gpu:0'): + start = time.time() + v1 = tf.Variable(tf.random.normal((size, size))) + v2 = tf.Variable(tf.random.normal((size, size))) + op = tf.matmul(v1, v2) + det = tf.linalg.det(op) + op2 = tf.linalg.inv(op) + with tf.compat.v1.Session() as sess: #config=config) as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(op) + sess.run(det) + sess.run(op2) + gpu_times.append(time.time() - start) + print('gpu time took: {0:.4f}'.format(time.time() - start)) +except Exception as err: + # Handle case with no appropriate GPU (or Cuda not installed) + print("Issue running using GPU(CUDA) :", type(err).__name__, err) + gpu_times = [0.0] * len(sizes) + +fig, ax = plt.subplots(figsize=(8, 6)) +ax.plot(sizes, gpu_times, label='GPU') +ax.plot(sizes, cpu_times, label='CPU') +plt.xlabel('Matrix Size') +plt.ylabel('Calc. Time (sec)') +plt.legend() +plt.savefig(os.path.join(gPATHO, 'times.png')) diff --git a/TensorflowExample/test-mlgpu.yaml b/TensorflowExample/test-mlgpu.yaml new file mode 100644 index 0000000..a1e13eb --- /dev/null +++ b/TensorflowExample/test-mlgpu.yaml @@ -0,0 +1,35 @@ +kind: M +api_version: v1beta3 +metadata: + display_name: Tensorflow Test Model + name: test-tensorflow + publisher: DAFNI Example Models + contact_point_name: DAFNI + contact_point_email: info@dafni.ac.uk + summary: > + Test Tensorflow and GPU Running on DAFNI + description: > + Run Matrix & ML Example (Tensorflow-Keras example) within the DAFNI framework + source_code: https://github.com/dafnifacility/dafni-example-models/tree/master/TensorflowExample + +spec: + resources: + use_gpu: true + inputs: + parameters: + - name: ISDAFNI + title: Running on DAFNI + type: boolean + default: True + description: True or False + required: True + outputs: + datasets: + - name: times.png + type: png + description: > + A png graph showing linear algebra timing (tensorflow) + - name: prediction.png + type: png + description: > + A png graph showing image classification predictions (tensorflow)