Skip to content

⚖ Bias Mitigation Automation #138

⚖ Bias Mitigation Automation

⚖ Bias Mitigation Automation #138

name: ⚖ Bias Mitigation Automation
# Controls when the action will run.
on:
workflow_dispatch:
inputs:
name:
description: '📗 Enter a name for your notebook (ex: output_notebook.ipynb )'
required: true
notebook:
description: "📒 Select the notebook you need to run."
default: "ethaiaudithub.ipynb"
type: choice
options:
- "ethaiaudithub.ipynb"
- "ethical_bias_mitigation.ipynb"
- "tutorial_bias_advertising.ipynb"
- "tutorial_medical_expenditure.ipynb"
- "demo_reweighing_preproc.ipynb"
- "demo_short_gerryfair_test.ipynb"
- "demo_reject_option_classification.ipynb"
- "demo_new_features.ipynb"
- "demo_ot_metric.ipynb"
- "demo_optim_preproc_adult.ipynb"
- "demo_optim_data_preproc.ipynb"
- "demo_meta_classifier.ipynb"
- "demo_mdss_detector.ipynb"
- "demo_mdss_classifier_metric.ipynb"
- "demo_lime.ipynb"
- "demo_lfr.ipynb"
- "demo_json_explainers.ipynb"
- "demo_gerryfair.ipynb"
- "demo_exponentiated_gradient_reduction.ipynb"
- "demo_disparate_impact_remover.ipynb"
- "demo_deterministic_reranking.ipynb"
- "demo_calibrated_eqodds_postprocessing.ipynb"
- "demo_adversarial_debiasing.ipynb"
dataset:
description: '🗃️ Enter Dataset URL if you need any, except basic datasets'
required: false
algorithm:
description: '⚙️ Enter Algorithm URL if you need any, except basic algorithms'
required: false
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
build-py:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [ 3.11]
env:
UCI_DB: "https://archive.ics.uci.edu/ml/machine-learning-databases"
PROPUBLICA_GH: "https://raw.githubusercontent.com/propublica/compas-analysis/bafff5da3f2e45eca6c2d5055faad269defd135a"
REPO_KEY: ${{secrets.ETHAI_AUDIT_HUB_GITHUB_TOKEN}}
username: github-actions
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Check out repo
uses: actions/checkout@v3
- name: Set up R
uses: r-lib/actions/setup-r@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -e '.[all]'
pip install flake8
pip list
python -m rpy2.situation
- name: Download basic datasets
run: |
wget ${UCI_DB}/adult/adult.data -P aif360/data/raw/adult/
wget ${UCI_DB}/adult/adult.test -P aif360/data/raw/adult/
wget ${UCI_DB}/adult/adult.names -P aif360/data/raw/adult/
wget ${UCI_DB}/statlog/german/german.data -P aif360/data/raw/german/
wget ${UCI_DB}/statlog/german/german.doc -P aif360/data/raw/german/
wget ${PROPUBLICA_GH}/compas-scores-two-years.csv -P aif360/data/raw/compas/
wget ${UCI_DB}/00222/bank-additional.zip -P aif360/data/raw/bank/ && unzip -j aif360/data/raw/bank/bank-additional.zip -d aif360/data/raw/bank/ && rm aif360/data/raw/bank/bank-additional.zip
(cd aif360/data/raw/meps;Rscript generate_data.R <<< y)
- name: Download extra datasets
run: |
if [ -n "${{ inputs.dataset }}" ]; then
wget ${{ inputs.dataset }}
else
echo "No dataset URL provided. Skipping download."
fi
- name: Unzip or untar the downloaded dataset
if: ${{ inputs.dataset }}
run: |
mkdir -p temp_folder
if [ -f *.zip ]; then
unzip *.zip -d temp_folder
elif [ -f *.tar.gz ]; then
tar -xzvf *.tar.gz -C temp_folder
else
echo "No zip or tar.gz file found. Skipping extraction."
fi
- name: Copy CSV files to github workspace
run: |
if [ -d temp_folder ]; then
find temp_folder -type f -name "*.csv" -exec cp -r {} ${{ github.workspace }}/examples \;
rm -rf temp_folder
else
echo "Error:No CSV file found."
fi
- name: List all the files in directory
run: ls -a
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Divert to Jupyter notebook directory
run: |
cd ${{ github.workspace }}/examples
ls -a
- name: Execute Jupyter Notebook
run: |
cd ${{ github.workspace }}/examples
pwd
# Execute the Jupyter notebook and save the output to a new notebook
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }}
- name: commit updated notebook
uses: EndBug/add-and-commit@v7
with:
author_name: Plot update bot
message: "Added executed notebook"
add: "${{ github.workspace }}/automation/${{ github.event.inputs.name }}"
# Add a new step to commit and push the changes
- name: Commit and Push Changes
run: |
git config --local user.name actions-user
git config --local user.email "[email protected]"
- name: Install TeX Live
run: sudo apt-get install -y texlive-xetex
- name: Install Pandoc
run: sudo apt-get install -y pandoc
- name: Convert Execution Log to PDF
run: |
cd ${{ github.workspace }}/automation
jupyter nbconvert --to pdf "${{ github.workspace }}/automation/${{ github.event.inputs.name }}" --output "${{ github.workspace }}/automation/${{ github.event.inputs.name }}_log.pdf"
- name: Generate Workflow Log PDF
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
cd ${{ github.workspace }}/automation
while gh run view ${{ github.run_id }} --log > workflow_log.txt | grep -q "still in progress"; do
sleep 10s
done
pandoc workflow_log.txt -o workflow_log.pdf
if: always()
- name: Upload Artifacts
uses: actions/upload-artifact@v2
if: always()
with:
name: "Audit Report ${{ github.actor }} - ${{ github.run_number }} "
path: ${{ github.workspace }}/automation/artifacts
build-r:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.11]
steps:
- name: Check out repo
uses: actions/checkout@v3
- name: Set up R
uses: r-lib/actions/setup-r@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install R dependencies
run: install.packages(c("reticulate", "rstudioapi", "testthat"))
shell: Rscript {0}
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install '.[all]'
- name: Install R package
run: R CMD INSTALL aif360/aif360-r