👨🔬 EthAIAuditHub Experiment Workflow #23
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: 👨🔬 EthAIAuditHub Experiment Workflow | |
on: | |
workflow_dispatch: | |
inputs: | |
name: | |
description: '📗 Enter a name for your notebook (ex: output_notebook.ipynb )' | |
required: true | |
notebook: | |
description: "📒 Select the notebook you need to run." | |
default: "ChooseNotebook.ipynb" | |
type: choice | |
options: | |
- "ChooseNotebook.ipynb" | |
- "ethaiaudithubTest.ipynb" | |
- "ethaiaudithubExtended.ipynb" | |
demoMetric1: | |
description: '📐 Select Metric1' | |
default: "ChooseMetric1" | |
type: choice | |
options: | |
- "ChooseMetric1" | |
- "DatasetMetric" | |
- "BinaryLabelDatasetMetric" | |
- "ClassificationMetric" | |
- "SampleDistortionMetric" | |
- "MDSSClassificationMetric" | |
demoMetric2: | |
description: '📐 Select if you need to test with a second metric' | |
default: "ChooseMetric2" | |
type: choice | |
options: | |
- "ChooseMetric2" | |
- "DatasetMetric" | |
- "BinaryLabelDatasetMetric" | |
- "ClassificationMetric" | |
- "SampleDistortionMetric" | |
- "MDSSClassificationMetric" | |
demoDataset: | |
description: '📊 Select Dataset' | |
default: "ChooseDataset" | |
type: choice | |
options: | |
- "ChooseDataset" | |
- "AdultDataset" | |
- "BankDataset" | |
- "CompasDataset" | |
- "GermanDataset" | |
- "LawSchoolGPADataset" | |
- "MEPSDataset19" | |
- "MEPSDataset20" | |
- "MEPSDataset21" | |
demoAlgorithm: | |
description: '💡 Select Algorithm (preprocessing/inprocessing/postprocessing)' | |
default: "ChooseAlgorithm" | |
type: choice | |
options: | |
- "ChooseAlgorithm" | |
- "Reweighing" | |
- "DisparateImpactRemover" | |
- "LFR" | |
- "OptimPreproc" | |
- "AdversarialDebiasing" | |
- "ARTClassifier" | |
- "GerryFairClassifier" | |
- "MetaFairClassifier" | |
- "PrejudiceRemover" | |
- "ExponentiatedGradientReduction" | |
- "GridSearchReduction" | |
- "CalibratedEqOddsPostprocessing" | |
- "EqOddsPostprocessing" | |
- "RejectOptionClassification" | |
- "DeterministicReranking" | |
demoAlgoPath: | |
description: '👣 Enter path of the file containing Algorithm.Ex:preprocessing.optim_preproc_helpers.data_preproc_functions\' | |
required: false | |
dataset: | |
description: '🗃️ Enter Dataset URL if you need any, except basic datasets' | |
required: false | |
algorithm: | |
description: '⚙️ Enter Algorithm URL if you need any, except basic algorithms' | |
required: false | |
externalNotebook: | |
description: '🗃️ Enter the URL if you need to execute any external notebook' | |
required: false | |
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | |
jobs: | |
# Job to build Python environment | |
build-py: | |
# Specify the operating system for the job | |
runs-on: ubuntu-latest | |
# Define strategy for job execution | |
strategy: | |
fail-fast: false | |
# Matrix strategy allows running multiple configurations | |
matrix: | |
python-version: [ 3.11] # Define Python version to use | |
# Define environment variables for the job | |
env: | |
# URLs for UCI Machine Learning Repository and ProPublica GitHub | |
UCI_DB: "https://archive.ics.uci.edu/ml/machine-learning-databases" | |
PROPUBLICA_GH: "https://raw.githubusercontent.com/propublica/compas-analysis/bafff5da3f2e45eca6c2d5055faad269defd135a" | |
# Access token for repository authentication | |
REPO_KEY: ${{secrets.ETHAI_AUDIT_HUB_GITHUB_TOKEN}} | |
# Define a username for GitHub Actions | |
username: github-actions | |
# Steps represent a sequence of tasks that will be executed as part of the job | |
steps: | |
# Checks-out the repository under $GITHUB_WORKSPACE, so the job can access it | |
- name: 📥 Check out repo | |
uses: actions/checkout@v3 | |
- name: 📦 Set up R | |
uses: r-lib/actions/setup-r@v2 | |
# Cache R packages | |
- name: 💰 Cache R packages | |
uses: actions/cache@v2 | |
with: | |
path: ~/.local/lib/R/site-library | |
key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }} | |
restore-keys: | | |
${{ runner.os }}-R- | |
- name: 🐍 Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
# Cache Python packages | |
- name: 💵 Cache Python packages | |
uses: actions/cache@v2 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: 📢 Echo Inputs | |
run: | | |
echo -e "\e[1;34mName:\e[0m ${{ github.event.inputs.name }}" | |
echo -e "\e[1;34mNotebook:\e[0m ${{ github.event.inputs.notebook }}" | |
echo -e "\e[1;34mDemo Metric 1:\e[0m ${{ github.event.inputs.demoMetric1 }}" | |
echo -e "\e[1;34mDemo Metric 2:\e[0m ${{ github.event.inputs.demoMetric2 }}" | |
echo -e "\e[1;34mDemo Dataset:\e[0m ${{ github.event.inputs.demoDataset }}" | |
echo -e "\e[1;34mDemo Algorithm:\e[0m ${{ github.event.inputs.demoAlgorithm }}" | |
echo -e "\e[1;34mDemo Algorithm Path:\e[0m ${{ github.event.inputs.demoAlgoPath }}" | |
echo -e "\e[1;34mDataset URL:\e[0m ${{ github.event.inputs.dataset }}" | |
echo -e "\e[1;34mAlgorithm URL:\e[0m ${{ github.event.inputs.algorithm }}" | |
echo -e "\e[1;34mExternal Notebook URL:\e[0m ${{ github.event.inputs.externalNotebook }}" | |
- name: 📦 Install dependencies | |
run: | | |
python -m pip install --upgrade pip setuptools wheel | |
pip install -e '.[all]' | |
pip install flake8 | |
pip list | |
python -m rpy2.situation | |
- name: 📊 Download basic datasets | |
run: | | |
wget ${UCI_DB}/adult/adult.data -P aif360/data/raw/adult/ | |
# Add more wget commands for other datasets | |
- name: 📊 Download extra dataset | |
run: | | |
if [ -n "${{ inputs.dataset }}" ]; then | |
wget ${{ inputs.dataset }} | |
else | |
echo "No dataset URL provided. Skipping download." | |
fi | |
- name: 📂 Unzip or untar the downloaded dataset | |
if: ${{ inputs.dataset }} | |
run: | | |
mkdir -p temp_folder | |
if [ -f *.zip ]; then | |
unzip *.zip -d temp_folder | |
elif [ -f *.tar.gz ]; then | |
tar -xzvf *.tar.gz -C temp_folder | |
else | |
echo "No zip or tar.gz file found. Skipping extraction." | |
fi | |
- name: 📄 Copy CSV files to GitHub workspace | |
run: | | |
if [ -d temp_folder ]; then | |
find temp_folder -type f -name "*.csv" -exec cp -r {} ${{ github.workspace }}/examples \; | |
rm -rf temp_folder | |
else | |
echo "Error:No CSV file found." | |
fi | |
- name: ⚙️ Download External Algorithm | |
run: | | |
if [ -n "${{ inputs.algorithm }}" ]; then | |
wget ${{ inputs.algorithm }} | |
else | |
echo "No external algorithm URL provided. Skipping download." | |
fi | |
- name: 📂 Unzip or untar the downloaded external algorithm | |
if: ${{ inputs.algorithm }} | |
run: | | |
mkdir -p temp_folder1 | |
if [ -f *.zip ]; then | |
unzip *.zip -d temp_folder1 | |
elif [ -f *.tar.gz ]; then | |
tar -xzvf *.tar.gz -C temp_folder1 | |
else | |
echo "No zip or tar.gz file found. Skipping extraction." | |
fi | |
- name: 📄 Copy .py files containing algorithm to GitHub workspace | |
run: | | |
if [ -d temp_folder1 ]; then | |
find temp_folder1 -type f -name "*.py" -exec cp -r {} ${{ github.workspace }}/examples \; | |
rm -rf temp_folder1 | |
else | |
echo "Error:No external python file found." | |
fi | |
- name: 📋 List all the files in directory | |
run: ls -a | |
- name: 🧹 Lint with flake8 | |
run: | | |
# Add commands for linting with flake8 | |
- name: 📒 Divert to Jupyter notebook directory | |
run: | | |
cd ${{ github.workspace }}/examples | |
ls -a | |
- name: 🚀 Execute Jupyter Notebook | |
run: | | |
# Navigate to examples directory | |
cd ${{ github.workspace }}/examples | |
# Get path | |
pwd | |
# Create an empty file for capturing the stack trace | |
touch ipynb_execution.txt | |
echo "Created file to save error logs" | |
# Give write access to .txt | |
chmod +x ipynb_execution.txt | |
echo "Write access was given to ipynb_execution.txt" | |
if [ "${{ github.event.inputs.notebook }}" != "ChooseNotebook.ipynb" ]; then | |
# Give read write permissions to edit ipynb file | |
chmod +x ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
echo "Read write access granted to ipynb file." | |
# Replace ipynb file inputs with user inputs | |
sed -i "s/Dataset/${{ github.event.inputs.demoDataset }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
sed -i "s/Metric1/${{ github.event.inputs.demoMetric1 }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
sed -i "s/Metric2/${{ github.event.inputs.demoMetric2 }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
sed -i "s/Algorithm/${{ github.event.inputs.demoAlgorithm }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
sed -i "s/Path/${{ github.event.inputs.demoAlgoPath }}/g" ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} | |
echo "Modified notebook according to user inputs" | |
# Execute the Jupyter notebook and save the output to a new notebook | |
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }} 2>&1 | tee -a ipynb_execution.txt || { echo "error: failed to execute Jupyter notebook. Please see log files to see the stacktrace."; exit 1; } | |
else | |
echo "No specific notebook selected. Trying to download an external notebook..." | |
if [ -n "${{ inputs.externalNotebook }}" ]; then | |
wget ${{ inputs.externalNotebook }} | |
echo "External notebook was downloaded to the workflow" | |
mkdir -p temp_folder2 | |
if [ -f *.zip ]; then | |
unzip *.zip -d temp_folder2 | |
elif [ -f *.tar.gz ]; then | |
tar -xzvf *.tar.gz -C temp_folder2 | |
else | |
echo "No zip or tar.gz file found. Skipping extraction." | |
fi | |
if [ -d temp_folder2 ]; then | |
# Find the extracted notebook and rename it | |
find temp_folder2 -type f -name "*.ipynb" -exec cp -r {} ${{ github.workspace }}/automation/externalNotebook.ipynb \; | |
rm -rf temp_folder2 | |
echo "External notebook file renamed to externalNotebook.ipynb" | |
# Execute the renamed notebook | |
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/automation/externalNotebook.ipynb --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }} 2>&1 | tee -a ipynb_execution.txt || { echo "error: failed to execute Jupyter notebook. Please see log files to see the stacktrace."; exit 1; } | |
else | |
echo "Error: No external ipynb file found." | |
fi | |
else | |
echo "No external notebook URL provided or check URL again. Skipping download." | |
fi | |
fi | |
- name: 📄 Log ipynb execution stack trace | |
uses: actions/upload-artifact@v2 | |
with: | |
name: ipynb-execution-stack-trace | |
path: ipynb_execution.txt | |
- name: 📝 Commit updated notebook | |
uses: EndBug/add-and-commit@v7 | |
with: | |
author_name: Plot update bot | |
message: "add: executed notebook" | |
add: "${{ github.workspace }}/automation/${{ github.event.inputs.name }}" | |
# Add a new step to commit and push the changes | |
- name: 🔀 Commit and Push Changes | |
run: | | |
git config --local user.name actions-user | |
git config --local user.email "[email protected]" | |
- name: 📂 Persist Logs of notebook execution log file | |
run: | | |
# Create artifacts directory | |
mkdir -p ${{ github.workspace }}/automation/artifacts | |
# Copy file to artifacts directory | |
cp ${{ github.workspace }}/examples/ipynb_execution.txt ${{ github.workspace }}/automation/artifacts/ | |
if: ${{ always() }} | |
- name: 📂 Persist Logs of executed ipynb file | |
run: | | |
# Copy file to artifacts directory | |
cp ${{ github.workspace }}/automation/${{ github.event.inputs.name }} ${{ github.workspace }}/automation/artifacts/ | |
if: ${{ always() }} | |
- name: 📤 Upload Artifacts | |
uses: actions/upload-artifact@v2 | |
if: always() | |
with: | |
name: "Audit Report ${{ github.actor }} - ${{ github.run_number }} " | |
path: ${{ github.workspace }}/automation/artifacts | |
build-r: | |
runs-on: ubuntu-latest | |
# Define strategy for job execution | |
strategy: | |
fail-fast: false | |
# Matrix strategy allows running multiple configurations | |
matrix: | |
python-version: [3.11] | |
steps: | |
# Checkout the repository | |
- name: 📥 Check out repo | |
uses: actions/checkout@v3 | |
# Set up R environment | |
- name: 📦 Set up R | |
uses: r-lib/actions/setup-r@v2 | |
# Set up Python environment | |
- name: 🐍 Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
# Install R dependencies | |
- name: 📦 Install R dependencies | |
run: install.packages(c("reticulate", "rstudioapi", "testthat")) | |
shell: Rscript {0} | |
# Install Python dependencies | |
- name: 📦 Install Python dependencies | |
run: | | |
python -m pip install --upgrade pip setuptools wheel | |
pip install '.[all]' | |
# Install R package | |
- name: 📦 Install R package | |
run: R CMD INSTALL aif360/aif360-r | |