Skip to content

Scrape data

Scrape data #242

Workflow file for this run

# This is a basic workflow to help you get started with Actions
name: Scrape data
# Controls when the action will run.
on:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '15 2 * * *'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- name: setup requirementss
run: pip install -r requirements.txt
- name: scrape data
run: make all
- name: upload to s3
env:
S3BUCKET: ${{ secrets.S3BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: make upload-to-s3
- name: Trigger Openness Project ETL
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.PAT }}
repository: datamade/openness-project-nmid
event-type: nightly-scrape-done