-
Notifications
You must be signed in to change notification settings - Fork 0
215 lines (188 loc) · 8.04 KB
/
bias-mitigation-automation.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
name: ⚖ Bias Mitigation Automation
# Controls when the action will run.
on:
workflow_dispatch:
inputs:
name:
description: '📗 Enter a name for your notebook (ex: output_notebook.ipynb )'
required: true
notebook:
description: "📒 Select the notebook you need to run."
default: "ethaiaudithub.ipynb"
type: choice
options:
- "ethaiaudithub.ipynb"
- "ethical_bias_mitigation.ipynb"
- "tutorial_bias_advertising.ipynb"
- "tutorial_medical_expenditure.ipynb"
- "demo_reweighing_preproc.ipynb"
- "demo_short_gerryfair_test.ipynb"
- "demo_reject_option_classification.ipynb"
- "demo_new_features.ipynb"
- "demo_ot_metric.ipynb"
- "demo_optim_preproc_adult.ipynb"
- "demo_optim_data_preproc.ipynb"
- "demo_meta_classifier.ipynb"
- "demo_mdss_detector.ipynb"
- "demo_mdss_classifier_metric.ipynb"
- "demo_lime.ipynb"
- "demo_lfr.ipynb"
- "demo_json_explainers.ipynb"
- "demo_gerryfair.ipynb"
- "demo_exponentiated_gradient_reduction.ipynb"
- "demo_disparate_impact_remover.ipynb"
- "demo_deterministic_reranking.ipynb"
- "demo_calibrated_eqodds_postprocessing.ipynb"
- "demo_adversarial_debiasing.ipynb"
- "ethaiaudithubDemo.ipynb"
dataset:
description: '🗃️ Enter Dataset URL if you need any, except basic datasets'
required: false
algorithm:
description: '⚙️ Enter Algorithm URL if you need any, except basic algorithms'
required: false
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
build-py:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [ 3.11]
env:
UCI_DB: "https://archive.ics.uci.edu/ml/machine-learning-databases"
PROPUBLICA_GH: "https://raw.githubusercontent.com/propublica/compas-analysis/bafff5da3f2e45eca6c2d5055faad269defd135a"
REPO_KEY: ${{secrets.ETHAI_AUDIT_HUB_GITHUB_TOKEN}}
username: github-actions
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: 📥 Check out repo
uses: actions/checkout@v3
- name: 📦 Set up R
uses: r-lib/actions/setup-r@v2
- name: 🐍 Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: 📢 Echo Inputs
run: |
echo -e "\e[1;34mNotebook Name:\e[0m ${{ github.event.inputs.name }}"
echo -e "\e[1;34mExecuting Notebook:\e[0m ${{ github.event.inputs.notebook }}"
echo -e "\e[1;34mDataset URL:\e[0m ${{ github.event.inputs.dataset }}"
echo -e "\e[1;34mAlgorithm URL:\e[0m ${{ github.event.inputs.algorithm }}"
- name: 📦 Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -e '.[all]'
pip install flake8
pip list
python -m rpy2.situation
- name: 📊 Download basic datasets
run: |
wget ${UCI_DB}/adult/adult.data -P aif360/data/raw/adult/
wget ${UCI_DB}/adult/adult.test -P aif360/data/raw/adult/
wget ${UCI_DB}/adult/adult.names -P aif360/data/raw/adult/
wget ${UCI_DB}/statlog/german/german.data -P aif360/data/raw/german/
wget ${UCI_DB}/statlog/german/german.doc -P aif360/data/raw/german/
wget ${PROPUBLICA_GH}/compas-scores-two-years.csv -P aif360/data/raw/compas/
wget ${UCI_DB}/00222/bank-additional.zip -P aif360/data/raw/bank/ && unzip -j aif360/data/raw/bank/bank-additional.zip -d aif360/data/raw/bank/ && rm aif360/data/raw/bank/bank-additional.zip
(cd aif360/data/raw/meps;Rscript generate_data.R <<< y)
- name: 📊 Download extra datasets
run: |
if [ -n "${{ inputs.dataset }}" ]; then
wget ${{ inputs.dataset }}
else
echo "No dataset URL provided. Skipping download."
fi
- name: 📂 Unzip or untar the downloaded dataset
if: ${{ inputs.dataset }}
run: |
mkdir -p temp_folder
if [ -f *.zip ]; then
unzip *.zip -d temp_folder
elif [ -f *.tar.gz ]; then
tar -xzvf *.tar.gz -C temp_folder
else
echo "No zip or tar.gz file found. Skipping extraction."
fi
- name: 📄 Copy CSV files to github workspace
run: |
if [ -d temp_folder ]; then
find temp_folder -type f -name "*.csv" -exec cp -r {} ${{ github.workspace }}/examples \;
rm -rf temp_folder
else
echo "Error:No CSV file found."
fi
- name: 📋 List all the files in directory
run: ls -a
- name: 🧹Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: 📒 Divert to Jupyter notebook directory
run: |
cd ${{ github.workspace }}/examples
ls -a
- name: 🚀 Execute Jupyter Notebook
run: |
cd ${{ github.workspace }}/examples
pwd
# Execute the Jupyter notebook and save the output to a new notebook
jupyter nbconvert --to notebook --execute ${{ github.workspace }}/examples/${{ github.event.inputs.notebook }} --output ${{ github.workspace }}/automation/${{ github.event.inputs.name }}
- name: 📝 commit updated notebook
uses: EndBug/add-and-commit@v7
with:
author_name: Plot update bot
message: "Added executed notebook"
add: "${{ github.workspace }}/automation/${{ github.event.inputs.name }}"
# Add a new step to commit and push the changes
- name: 🔀 Commit and Push Changes
run: |
git config --local user.name actions-user
git config --local user.email "[email protected]"
- name: 📂 Persist Logs
run: |
mkdir -p ${{ github.workspace }}/automation/artifacts
cp ${{ github.workspace }}/automation/${{ github.event.inputs.name }} ${{ github.workspace }}/automation/artifacts/
if: ${{ always() }}
- name: 📤 Upload Artifacts
uses: actions/upload-artifact@v2
if: always()
with:
name: "Audit Report ${{ github.actor }} - ${{ github.run_number }} "
path: ${{ github.workspace }}/automation/artifacts
build-r:
runs-on: ubuntu-latest
# Define strategy for job execution
strategy:
fail-fast: false
# Matrix strategy allows running multiple configurations
matrix:
python-version: [3.11]
steps:
# Checkout the repository
- name: 📥 Check out repo
uses: actions/checkout@v3
# Set up R environment
- name: 📦 Set up R
uses: r-lib/actions/setup-r@v2
# Set up Python environment
- name: 🐍 Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
# Install R dependencies
- name: 📦 Install R dependencies
run: install.packages(c("reticulate", "rstudioapi", "testthat"))
shell: Rscript {0}
# Install Python dependencies
- name: 📦 Install Python dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install '.[all]'
# Install R package
- name: 📦 Install R package
run: R CMD INSTALL aif360/aif360-r