Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
9bow committed Jul 23, 2023
0 parents commit 2587362
Show file tree
Hide file tree
Showing 198 changed files with 53,423 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .actions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
scripts for generating notebooks

**GHA here**

- generate notebooks
- flow to ban any added notebook in PR (fail if changes in .notebooks)

**PL side**

- git submodule with these examples
- gha cron to update submodule head
713 changes: 713 additions & 0 deletions .actions/assistant.py

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions .actions/git-diff-sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

set -e
printf "Detect changes for: $1 >> $2\n\n"

b1="${1//'/'/'_'}"
printf "Branch alias: $b1\n"
# list all dirs in source branch
python .actions/assistant.py list_dirs > "dirs-$b1.txt"
cat "dirs-$b1.txt"

head=$(git rev-parse origin/$2)
git diff --name-only $head --output=target-diff.txt
printf "\nRaw changes:\n"
cat target-diff.txt
# transfer the source CLI version
mkdir -p _TEMP
cp -r .actions/ _TEMP/.actions/

git checkout $2
b2="${2//'/'/'_'}"
printf "Branch alias: $b2\n"
# recover the original CLI
#rm -rf .actions && mv _TEMP/.actions .actions
# list all dirs in target branch
python _TEMP/.actions/assistant.py list_dirs ".notebooks" --include_file_ext=".ipynb" > "dirs-$b2.txt"
cat "dirs-$b2.txt"

printf "\n\n"
git merge --ff -s resolve origin/$1

python _TEMP/.actions/assistant.py group-folders target-diff.txt --fpath_actual_dirs "['dirs-$b1.txt', 'dirs-$b2.txt']"
printf "\n\nChanged folders:\n"
cat changed-folders.txt
printf "\n\nDropped folders:\n"
cat dropped-folders.txt
printf "\n"
6 changes: 6 additions & 0 deletions .actions/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fire
tqdm
PyYAML
wcmatch
requests
pip
30 changes: 30 additions & 0 deletions .actions/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os

import pytest
from assistant import AssistantCLI

_PATH_ROOT = os.path.dirname(os.path.dirname(__file__))
_PATH_TEMPLATES = os.path.join(_PATH_ROOT, "templates")
_PATH_DIR_SIMPLE = os.path.join(_PATH_TEMPLATES, "simple")
_PATH_DIR_TITANIC = os.path.join(_PATH_TEMPLATES, "titanic")


def _path_in_dir(fname: str, folder: str = _PATH_ROOT) -> str:
return os.path.join(folder, fname)


@pytest.mark.parametrize(
"cmd,args",
[
("list_dirs", []),
("list_dirs", [".", ".ipynb"]),
("bash_render", [_PATH_DIR_SIMPLE]),
("bash_test", [_PATH_DIR_SIMPLE]),
("group_folders", [_path_in_dir("master-diff.txt"), _path_in_dir("dirs-b1.txt"), _path_in_dir("dirs-b2.txt")]),
("convert_ipynb", [_PATH_DIR_SIMPLE]),
("copy_notebooks", [_PATH_ROOT]),
("update_env_details", [_PATH_DIR_SIMPLE]),
],
)
def test_assistant_commands(cmd: str, args: list):
AssistantCLI().__getattribute__(cmd)(*args)
172 changes: 172 additions & 0 deletions .azure/ipynb-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
trigger:
# this shall process all these workflows in sequence even several PRs are merged shortly
batch: "true"
# publish notebooks only from default/main branch
branches:
include: [ main ]

# no run on PR as this is exclusive for publishing notebooks
pr: none

jobs:

- job: sync_pub
pool:
vmImage: 'Ubuntu-20.04'
variables:
ACCELERATOR: CPU,GPU
PUB_BRANCH: publication
COMMIT_MSG: $(echo "$(Build.SourceVersionMessage)" | head -n 1)
COMMIT_HASH: "$(Build.SourceVersion)"
steps:
- bash: |
git config --global user.email "[email protected]"
git config --global user.name "Azure Pipelines"
printf "commit hash:\n $(COMMIT_HASH)\n"
printf "commit message:\n $(COMMIT_MSG)\n"
displayName: 'Set Git user'
- bash: |
set -e
git fetch --all
echo $(PUB_BRANCH)
git ls-remote --heads origin ${PUB_BRANCH} | grep ${PUB_BRANCH} >/dev/null
if [ "$?" == "1" ] ; then echo "Branch doesn't exist"; exit; fi
displayName: 'Git branch check'
- bash: pip install -r .actions/requires.txt
displayName: 'Install dependencies'
- bash: |
current_branch=$(cut -d '/' -f3- <<< $(Build.SourceBranch))
printf "$current_branch\n"
bash .actions/git-diff-sync.sh $current_branch $(PUB_BRANCH)
displayName: 'Compare changes & sync'
- bash: |
notebooks=$(python .actions/assistant.py generate-matrix changed-folders.txt)
printf "Changed notebooks: $notebooks\n"
echo "##vso[task.setVariable variable=dirs;isOutput=true]$notebooks"
name: mtrx
displayName: 'Changed matrix'
- bash: |
# remove notebooks which have moved
while IFS= read -r line; do
git rm .notebooks/$line.ipynb
git rm .notebooks/$line.yaml
done <<< $(cat dropped-folders.txt)
git status
git commit -m "prune: $(COMMIT_HASH)"
condition: gt(variables['dropped.folders'], 0)
displayName: 'Prune notebook'
- bash: |
git status
git push https://$(PAT_GHOST)@github.com/Lightning-AI/tutorials.git $(PUB_BRANCH)
displayName: 'Finish push'
- job: papermill
dependsOn: sync_pub
strategy:
# generated matrix with changed notebooks, include fields: "notebook", "agent-pool" and "docker-image"
matrix: $[ dependencies.sync_pub.outputs['mtrx.dirs'] ]
# Maximum number of jobs running in parallel, use 1 to run in sequence and reduce collisions
maxParallel: "1"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"
# how long to run the job before automatically cancelling
# When 0 is specified, the maximum limit is used:
# - For 360 minutes (6 hours) on Microsoft-hosted agents with a public project and public repository
# - For 60 minutes on Microsoft-hosted agents with a private project or private repository
timeoutInMinutes: "180"

pool: "$(agent-pool)"
# this need to have installed docker in the base machine/image...
container:
image: "$(docker-image)"
options: "--gpus=all --shm-size=32g -v /usr/bin/docker:/tmp/docker:ro"

variables:
ACCELERATOR: CPU,GPU
PUB_BRANCH: publication
PATH_DATASETS: "$(Build.Repository.LocalPath)/.datasets"
COMMIT_MSG: $(echo "$(Build.SourceVersionMessage)" | head -n 1)
COMMIT_HASH: "$(Build.SourceVersion)"
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )

condition: ne(dependencies.sync_pub.outputs['mtrx.dirs'], '')

steps:
- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
echo "##vso[task.setvariable variable=CONTAINER_ID]$(head -1 /proc/self/cgroup|cut -d/ -f3)"
displayName: 'Set environment variables'
- bash: |
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
echo $CUDA_VISIBLE_DEVICES
echo $CONTAINER_ID
python --version
pip list
displayName: 'Image info & NVIDIA'
- script: |
/tmp/docker exec -t -u 0 $CONTAINER_ID \
sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
displayName: 'Install Sudo in container (thanks Microsoft!)'
- bash: |
git config --global user.email "[email protected]"
git config --global user.name "Azure Pipelines"
printf "commit hash:\n $(COMMIT_HASH)\n"
printf "commit message:\n $(COMMIT_MSG)\n"
displayName: 'Set Git user'
- bash: |
set -e
git fetch --all
echo $(PUB_BRANCH)
git ls-remote --heads origin ${PUB_BRANCH} | grep ${PUB_BRANCH} >/dev/null
if [ "$?" == "1" ] ; then echo "Branch doesn't exist"; exit; fi
git checkout $(PUB_BRANCH)
git show-ref $(PUB_BRANCH)
git pull
displayName: 'Git check & switch branch'
- bash: |
set -e
sudo apt-get update -q --fix-missing
sudo apt install -y tree ffmpeg
#pip install --upgrade pip
#pip --version
pip install -r requirements.txt -r _requirements/data.txt
pip list
displayName: 'Install dependencies'
- bash: |
set -e
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu > 0, f'GPU: {mgpu}'"
python -m papermill --version
displayName: 'Sanity check'
- bash: python .actions/assistant.py convert-ipynb $(notebook)
displayName: 'Generate notebook'

- bash: |
set -e
mkdir $(PATH_DATASETS)
python .actions/assistant.py bash-render $(notebook)
cat .actions/_ipynb-render.sh
bash .actions/_ipynb-render.sh
git status
git commit -m "publish [GPU]: $(notebook)"
env:
KAGGLE_USERNAME: $(KAGGLE_USERNAME)
KAGGLE_KEY: $(KAGGLE_KEY)
displayName: 'Render notebook'
- bash: |
git status
git show-ref $(PUB_BRANCH)
git push https://$(PAT_GHOST)@github.com/Lightning-AI/tutorials.git $(PUB_BRANCH)
displayName: 'Finish push'
106 changes: 106 additions & 0 deletions .azure/ipynb-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
trigger: none
pr:
branches:
include: [ main ]
autoCancel: "true"
drafts: "true"

# Multi-job configuration
# - https://learn.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#multi-job-configuration

jobs:

- job: check_diff
pool:
vmImage: 'Ubuntu-20.04'
steps:
- bash: |
pip install -r .actions/requires.txt
pip list
displayName: 'Install dependencies'
- bash: |
head=$(git rev-parse origin/main)
printf "Head: $head\n"
git diff --name-only $head --output=target-diff.txt
python .actions/assistant.py group-folders --fpath_gitdiff=target-diff.txt
printf "Changed folders:\n"
cat changed-folders.txt
displayName: 'Process folders'
- bash: |
notebooks=$(python .actions/assistant.py generate-matrix changed-folders.txt)
printf "Changed notebooks: $notebooks\n"
echo "##vso[task.setVariable variable=dirs;isOutput=true]$notebooks"
name: mtrx
displayName: 'Changed matrix'
- job: nbval
dependsOn: check_diff
strategy:
matrix: $[ dependencies.check_diff.outputs['mtrx.dirs'] ]
# how long to run the job before automatically cancelling
timeoutInMinutes: "95"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

pool: "$(agent-pool)"
# this need to have installed docker in the base image...
container:
image: "$(docker-image)"
options: "--gpus=all --shm-size=32g -v /usr/bin/docker:/tmp/docker:ro"

variables:
ACCELERATOR: CPU,GPU
PATH_DATASETS: "$(Build.Repository.LocalPath)/.datasets"
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )

condition: ne(dependencies.check_diff.outputs['mtrx.dirs'], '')

steps:

- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
echo "##vso[task.setvariable variable=CONTAINER_ID]$(head -1 /proc/self/cgroup|cut -d/ -f3)"
displayName: 'Set environment variables'
- bash: |
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
echo $CUDA_VISIBLE_DEVICES
echo $CONTAINER_ID
python --version
pip list | grep torch
displayName: 'Image info & NVIDIA'
- script: |
/tmp/docker exec -t -u 0 $CONTAINER_ID \
sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
displayName: 'Install Sudo in container (thanks Microsoft!)'
- bash: |
set -e
sudo apt-get update -q --fix-missing
sudo apt install -y tree ffmpeg
pip install -r requirements.txt -r _requirements/data.txt
pip list
displayName: 'Install dependencies'
- bash: |
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu > 0, f'GPU: {mgpu}'"
displayName: 'Sanity check'
- bash: python .actions/assistant.py convert-ipynb $(notebook)
displayName: 'Generate notebook'

- bash: |
set -e
mkdir $(PATH_DATASETS)
python .actions/assistant.py bash-test $(notebook)
cat .actions/_ipynb-test.sh
bash .actions/_ipynb-test.sh
env:
KAGGLE_USERNAME: $(KAGGLE_USERNAME)
KAGGLE_KEY: $(KAGGLE_KEY)
displayName: 'PyTest notebook'
Loading

0 comments on commit 2587362

Please sign in to comment.