Skip to content

pipeline

pipeline #181

Workflow file for this run

name: pipeline
on:
schedule:
- cron: '0 4 * * TUE'
workflow_dispatch:
inputs:
message:
required: false
default: '🤖 updated dataset files'
description: Commit message for the 'add-and-commit' step
acquire_args:
required: false
default: "--asset all --seasons 2023"
description: Arguments to be passed to the acquiring script
jobs:
data-pipeline:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- name: checkout
uses: actions/checkout@v2
- name: Get branch name
id: branch-name
uses: tj-actions/branch-names@v7
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install library
run: poetry install --no-interaction
- name: run acquire
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: eu-west-1
BRANCH: ${{ steps.branch-name.outputs.current_branch }}
ARGS: ${{ github.event.inputs.acquire_args }}
run: |
# trigger prep job in aws batch
DEFAULT_ARGS="--asset all --seasons 2023"
EFFECTIVE_ARGS=${ARGS:-${DEFAULT_ARGS}}
DAY=$(date +"%Y-%m-%d")
if [ $BRANCH = master ]; then
JOB_DEFINITION_NAME=transfermarkt-datasets-batch-job-definition-master
else
JOB_DEFINITION_NAME=transfermarkt-datasets-batch-job-definition-dev
fi
source .venv/bin/activate
make \
acquire_cloud \
JOB_DEFINITION_NAME=$JOB_DEFINITION_NAME \
JOB_NAME=on-schedule-$DAY \
BRANCH=$BRANCH \
ARGS="$EFFECTIVE_ARGS" \
MESSAGE="🤖 updated raw dataset files"