Skip to content

👨‍🔬 EthAIAuditHub Experiment Workflow #20

👨‍🔬 EthAIAuditHub Experiment Workflow

👨‍🔬 EthAIAuditHub Experiment Workflow #20

name: 👨‍🔬 EthAIAuditHub Experiment Workflow
on:
workflow_dispatch:
inputs:
name:
description: '📗 Enter a name for your notebook (ex: output_notebook.ipynb )'
required: true
notebook:
description: "📒 Select the notebook you need to run."
default: "Choose a Notebook"
type: choice
options:
- "Choose a Notebook"
- "ethaiaudithubTest.ipynb"
- "ethaiaudithubExtended.ipynb"
demoMetric1:
description: '📐 Select Metric1'
default: "Choose Metric1"
type: choice
options:
- "Choose Metric1"
- "DatasetMetric"
- "BinaryLabelDatasetMetric"
- "ClassificationMetric"
- "SampleDistortionMetric"
- "MDSSClassificationMetric"
demoMetric2:
description: '📐 Select if you need to test with a second metric'
default: "Choose Metric2"
type: choice
options:
- "Choose Metric2"
- "DatasetMetric"
- "BinaryLabelDatasetMetric"
- "ClassificationMetric"
- "SampleDistortionMetric"
- "MDSSClassificationMetric"
demoDataset:
description: '📊 Select Dataset'
default: "Choose Dataset"
type: choice
options:
- "Choose Dataset"
- "AdultDataset"
- "BankDataset"
- "CompasDataset"
- "GermanDataset"
- "LawSchoolGPADataset"
- "MEPSDataset19"
- "MEPSDataset20"
- "MEPSDataset21"
demoAlgorithm:
description: '💡 Select Algorithm (preprocessing/inprocessing/postprocessing)'
default: "Choose Algorithm"
type: choice
options:
- "Choose Algorithm"
- "Reweighing"
- "DisparateImpactRemover"
- "LFR"
- "OptimPreproc"
- "AdversarialDebiasing"
- "ARTClassifier"
- "GerryFairClassifier"
- "MetaFairClassifier"
- "PrejudiceRemover"
- "ExponentiatedGradientReduction"
- "GridSearchReduction"
- "CalibratedEqOddsPostprocessing"
- "EqOddsPostprocessing"
- "RejectOptionClassification"
- "DeterministicReranking"
demoAlgoPath:
description: '👣 Enter path of the file containing Algorithm.Ex:preprocessing.optim_preproc_helpers.data_preproc_functions\'
required: false
dataset:
description: '🗃️ Enter Dataset URL if you need any, except basic datasets'
required: false
algorithm:
description: '⚙️ Enter Algorithm URL if you need any, except basic algorithms'
required: false
externalNotebook:
description: '🗃️ Enter the URL if you need to execute any external notebook'
required: false
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# Job to build Python environment
build-py:
# Specify the operating system for the job
runs-on: ubuntu-latest
# Define strategy for job execution
strategy:
fail-fast: false
# Matrix strategy allows running multiple configurations
matrix:
python-version: [ 3.11] # Define Python version to use
# Define environment variables for the job
env:
# URLs for UCI Machine Learning Repository and ProPublica GitHub
UCI_DB: "https://archive.ics.uci.edu/ml/machine-learning-databases"
PROPUBLICA_GH: "https://raw.githubusercontent.com/propublica/compas-analysis/bafff5da3f2e45eca6c2d5055faad269defd135a"
# Access token for repository authentication
REPO_KEY: ${{secrets.ETHAI_AUDIT_HUB_GITHUB_TOKEN}}
# Define a username for GitHub Actions
username: github-actions
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out the repository under $GITHUB_WORKSPACE, so the job can access it
- name: 📥 Check out repo
uses: actions/checkout@v3
- name: 📦 Set up R
uses: r-lib/actions/setup-r@v2
# Cache R packages
- name: 💰 Cache R packages
uses: actions/cache@v2
with:
path: ~/.local/lib/R/site-library
key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }}
restore-keys: |
${{ runner.os }}-R-
- name: 🐍 Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
# Cache Python packages
- name: 💵 Cache Python packages
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: 📢 Echo Inputs
run: |
echo -e "\e[1;34mName:\e[0m ${{ github.event.inputs.name }}"
echo -e "\e[1;34mNotebook:\e[0m ${{ github.event.inputs.notebook }}"
echo -e "\e[1;34mDemo Metric 1:\e[0m ${{ github.event.inputs.demoMetric1 }}"
echo -e "\e[1;34mDemo Metric 2:\e[0m ${{ github.event.inputs.demoMetric2 }}"
echo -e "\e[1;34mDemo Dataset:\e[0m ${{ github.event.inputs.demoDataset }}"
echo -e "\e[1;34mDemo Algorithm:\e[0m ${{ github.event.inputs.demoAlgorithm }}"
echo -e "\e[1;34mDemo Algorithm Path:\e[0m ${{ github.event.inputs.demoAlgoPath }}"
echo -e "\e[1;34mDataset URL:\e[0m ${{ github.event.inputs.dataset }}"
echo -e "\e[1;34mAlgorithm URL:\e[0m ${{ github.event.inputs.algorithm }}"
echo -e "\e[1;34mExternal Notebook URL:\e[0m ${{ github.event.inputs.externalNotebook }}"
- name: 📦 Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -e '.[all]'
pip install flake8
pip list
python -m rpy2.situation
- name: 📊 Download basic datasets
run: |
wget ${UCI_DB}/adult/adult.data -P aif360/data/raw/adult/
# Add more wget commands for other datasets
- name: 📊 Download extra dataset
run: |
if [ -n "${{ inputs.dataset }}" ]; then
wget ${{ inputs.dataset }}
else
echo "No dataset URL provided. Skipping download."
fi
- name: 📂 Unzip or untar the downloaded dataset
if: ${{ inputs.dataset }}
run: |
mkdir -p temp_folder
# Add commands to unzip or untar the dataset files
- name: 📄 Copy CSV files to GitHub workspace
run: |
if [ -d temp_folder ]; then
find temp_folder -type f -name "*.csv" -exec cp -r {} ${{ github.workspace }}/examples \;
rm -rf temp_folder
else
echo "Error:No CSV file found."
fi
- name: 📗 Download External Notebook
run: |
if [ -n "${{ inputs.externalNotebook }}" ]; then
wget ${{ inputs.externalNotebook }}
else
echo "No external notebook URL provided. Skipping download."
fi
- name: ⚙️ Download External Algorithm
run: |
if [ -n "${{ inputs.algorithm }}" ]; then
wget ${{ inputs.algorithm }}
else
echo "No external algorithm URL provided. Skipping download."
fi
- name: 📋 List all the files in directory
run: ls -a
- name: 🧹 Lint with flake8
run: |
# Add commands for linting with flake8
- name: 📒 Divert to Jupyter notebook directory
run: |
cd ${{ github.workspace }}/examples
ls -a
- name: 🚀 Execute Jupyter Notebook
run: |
# Navigate to examples directory
cd ${{ github.workspace }}/examples
# Get path
pwd
# Create an empty file for capturing the stack trace
touch ipynb_execution.txt
echo "Created file to save error logs"
# Give write access to .txt
chmod +x ipynb_execution.txt
echo "Write access was given to ipynb_execution.txt"
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }} 2>&1 | tee -a ipynb_execution.txt || { echo "error: failed to execute Jupyter notebook. Please see log files to stacktrace."; exit 1; }
if [ -n "${{ github.event.inputs.notebook }}" ]; then
# Give read write permissions to edit ipynb file
chmod +x ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
echo "Read write access granted to ipynb file."
# Replace ipynb file inputs with user inputs
sed -i "s/Dataset/${{ github.event.inputs.demoDataset }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
sed -i "s/Metric1/${{ github.event.inputs.demoMetric1 }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
sed -i "s/Metric2/${{ github.event.inputs.demoMetric2 }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
sed -i "s/Algorithm/${{ github.event.inputs.demoAlgorithm }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
sed -i "s/Path/${{ github.event.inputs.demoAlgoPath }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }}
echo "Modified notebook according to user inputs"
# Execute the Jupyter notebook and save the output to a new notebook
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }} 2>&1 | tee -a ipynb_execution.txt || { echo "error: failed to execute Jupyter notebook. Please see log files to see the stacktrace."; exit 1; }
else
echo "No specific notebook selected. Trying to download an external notebook..."
if [ -n "${{ inputs.externalNotebook }}" ]; then
wget ${{ inputs.externalNotebook }}
echo "External notebook was downloaded to the workflow"
# Execute the downloaded notebook
jupyter nbconvert --to notebook --execute ${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }} 2>&1 | tee -a ipynb_execution.txt || { echo "error: failed to execute Jupyter notebook. Please see log files to see the stacktrace."; exit 1; }
else
echo "No external notebook URL provided. Skipping download."
fi
fi
- name: 📄 Log ipynb execution stack trace
uses: actions/upload-artifact@v2
with:
name: ipynb-execution-stack-trace
path: ipynb_execution.txt
- name: 📝 Commit updated notebook
uses: EndBug/add-and-commit@v7
with:
author_name: Plot update bot
message: "add: executed notebook"
add: "${{ github.workspace }}/automation/${{ github.event.inputs.name }}"
# Add a new step to commit and push the changes
- name: 🔀 Commit and Push Changes
run: |
git config --local user.name actions-user
git config --local user.email "[email protected]"
- name: 📂 Persist Logs of notebook execution log file
run: |
# Create artifacts directory
mkdir -p ${{ github.workspace }}/automation/artifacts
# Copy file to artifacts directory
cp ${{ github.workspace }}/examples/ipynb_execution.txt ${{ github.workspace }}/automation/artifacts/
if: ${{ always() }}
- name: 📂 Persist Logs of executed ipynb file
run: |
# Copy file to artifacts directory
cp ${{ github.workspace }}/automation/${{ github.event.inputs.name }} ${{ github.workspace }}/automation/artifacts/
if: ${{ always() }}
- name: 📤 Upload Artifacts
uses: actions/upload-artifact@v2
if: always()
with:
name: "Audit Report ${{ github.actor }} - ${{ github.run_number }} "
path: ${{ github.workspace }}/automation/artifacts
build-r:
runs-on: ubuntu-latest
# Define strategy for job execution
strategy:
fail-fast: false
# Matrix strategy allows running multiple configurations
matrix:
python-version: [3.11]
steps:
# Checkout the repository
- name: 📥 Check out repo
uses: actions/checkout@v3
# Set up R environment
- name: 📦 Set up R
uses: r-lib/actions/setup-r@v2
# Set up Python environment
- name: 🐍 Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
# Install R dependencies
- name: 📦 Install R dependencies
run: install.packages(c("reticulate", "rstudioapi", "testthat"))
shell: Rscript {0}
# Install Python dependencies
- name: 📦 Install Python dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install '.[all]'
# Install R package
- name: 📦 Install R package
run: R CMD INSTALL aif360/aif360-r