Skip to content

Commit

Permalink
Split data, reorganize code base and add slack messaging
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer committed Sep 16, 2024
1 parent e3f337a commit 3e218b6
Show file tree
Hide file tree
Showing 11 changed files with 617 additions and 255 deletions.
66 changes: 66 additions & 0 deletions .github/workflows/dtypes_benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Data Types Benchmark

on:
push:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install invoke .[test]
- name: Create folder and JSON file
run: |
mkdir -p results
touch results/${{ matrix.python-version }}.json
# Run the benchmarking
- name: Benchmark Data Types
env:
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}
run: |
invoke benchmark-dtypes
# Upload the CSV files as artifacts
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: results-${{ matrix.python-version }}
path: results/*.csv

upload:
runs-on: ubuntu-latest
needs: build

steps:
# Download the artifacts
- name: Download artifacts
uses: actions/download-artifact@v3
with:
path: results/

# Generate the report
- name: Generate the report
env:
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}

run: python -m tests.benchmark.utils
90 changes: 54 additions & 36 deletions .github/workflows/release_notes.yml
Original file line number Diff line number Diff line change
@@ -1,52 +1,70 @@
name: Release Notes Generator
name: Data Types Benchmark

on:
workflow_dispatch:
inputs:
branch:
description: 'Branch to merge release notes into.'
run_tests:
description: 'Run integration and unit tests'
required: true
default: 'main'
version:
description:
'Version to use for the release. Must be in format: X.Y.Z.'
date:
description:
'Date of the release. Must be in format YYYY-MM-DD.'
type: boolean
default: true

jobs:
releasenotesgeneration:
build:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: '3.10'
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install requests==2.31.0
python -m pip install --upgrade pip
python -m pip install invoke .[test]
- name: Create folder and JSON file
run: |
mkdir -p results
touch results/${{ matrix.python-version }}.json
- name: Generate release notes
# Run the benchmarking
- name: Benchmark Data Types
env:
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
run: >
python scripts/release_notes_generator.py
-v ${{ inputs.version }}
-d ${{ inputs.date }}
- name: Create pull request
id: cpr
uses: peter-evans/create-pull-request@v4
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}
run: |
invoke benchmark-dtypes
# Upload the CSV files as artifacts
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
token: ${{ secrets.GH_ACCESS_TOKEN }}
commit-message: Release notes for v${{ inputs.version }}
author: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
title: v${{ inputs.version }} Release Notes
body: "This is an auto-generated PR to update the release notes."
branch: release-notes
branch-suffix: short-commit-hash
base: ${{ inputs.branch }}
name: results-${{ matrix.python-version }}
path: results/*.csv

upload:
runs-on: ubuntu-latest
needs: build

steps:
# Download the artifacts
- name: Download artifacts
uses: actions/download-artifact@v3
with:
path: results/

# Generate the report
- name: Generate the report
env:
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}

run: python -m tests.benchmark.utils
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ test = [
'tomli>=2.0.0,<3',
'pydrive',
'pyarrow',
'gitpython',
'slack-sdk>=3.23,<4.0',
]
pomegranate = ['pomegranate>=0.14.3,<0.15']
dev = [
Expand Down
34 changes: 32 additions & 2 deletions tests/_external/gdrive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import os
import pathlib
import tempfile
from functools import lru_cache
from datetime import date

import git
import pandas as pd
import yaml
from pydrive.auth import GoogleAuth
Expand All @@ -15,6 +16,14 @@
PYDRIVE_CREDENTIALS = 'PYDRIVE_CREDENTIALS'


def _generate_filename():
"""Generate a filename with today's date and the commit id."""
repo = git.Repo(search_parent_directories=True)
commit_id = repo.head.object.hexsha
today = str(date.today())
return f'{today}-{commit_id}.xlsx'


def _get_drive_client():
tmp_credentials = os.getenv(PYDRIVE_CREDENTIALS)
if not tmp_credentials:
Expand Down Expand Up @@ -47,7 +56,24 @@ def _get_drive_client():
return GoogleDrive(gauth)


@lru_cache()
def get_latest_file(folder_id):
"""Get the latest file from the given Google Drive folder.
Args:
folder (str):
The string Google Drive folder ID.
"""
drive = _get_drive_client()
drive_query = drive.ListFile({
'q': f"'{folder_id}' in parents and trashed=False",
'orderBy': 'modifiedDate desc',
'maxResults': 1
})
file_list = drive_query.GetList()
if len(file_list) > 0:
return file_list[0]


def read_excel(file_id):
"""Read a file as an XLSX from Google Drive.
Expand Down Expand Up @@ -96,7 +122,11 @@ def save_to_gdrive(output_folder, results, output_filename=None):
str:
Google drive file id of uploaded file.
"""
if not output_filename:
output_filename = _generate_filename()

output = io.BytesIO()

with pd.ExcelWriter(output, engine='xlsxwriter') as writer: # pylint: disable=E0110
for sheet_name, data in results.items():
data.to_excel(writer, sheet_name=sheet_name, index=False)
Expand Down
42 changes: 42 additions & 0 deletions tests/_external/slack_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Utility functions for Slack integration."""

import os

from slack_sdk import WebClient


def _get_slack_client():
"""Create an authenticated Slack client.
Returns:
WebClient:
An authenticated Slack WebClient instance.
"""
token = os.getenv('SLACK_TOKEN')
client = WebClient(token=token)
return client


def post_slack_message(channel, text):
"""Post a message to a Slack channel.
Args:
channel (str):
The name of the channel to post to.
text (str):
The message to send to the channel.
Returns:
SlackResponse:
Response from Slack API call
"""
client = _get_slack_client()
response = client.chat_postMessage(channel=channel, text=text)
if not response['ok']:
error = response.get('error', 'unknown_error')
msg = (
f'{error} occured trying to post message to {channel}'
)
raise RuntimeError(msg)

return response
87 changes: 87 additions & 0 deletions tests/benchmark/numpy_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import numpy as np
import pandas as pd

NUMPY_DTYPES = {
'np.int8': pd.DataFrame({
'np.int8': pd.Series([np.int8(1), np.int8(-1), np.int8(127)], dtype='int8')
}),
'np.int16': pd.DataFrame({
'np.int16': pd.Series([np.int16(2), np.int16(-2), np.int16(32767)], dtype='int16')
}),
'np.int32': pd.DataFrame({
'np.int32': pd.Series([np.int32(3), np.int32(-3), np.int32(2147483647)], dtype='int32')
}),
'np.int64': pd.DataFrame({
'np.int64': pd.Series([np.int64(4), np.int64(-4), np.int64(922)], dtype='int64')
}),
'np.uint8': pd.DataFrame({
'np.uint8': pd.Series([np.uint8(5), np.uint8(10), np.uint8(255)], dtype='uint8')
}),
'np.uint16': pd.DataFrame({
'np.uint16': pd.Series([np.uint16(6), np.uint16(20), np.uint16(65535)], dtype='uint16')
}),
'np.uint32': pd.DataFrame({
'np.uint32': pd.Series([np.uint32(7), np.uint32(30), np.uint32(42)], dtype='uint32')
}),
'np.uint64': pd.DataFrame({
'np.uint64': pd.Series([np.uint64(8), np.uint64(40), np.uint64(184467)], dtype='uint64')
}),
'np.float16': pd.DataFrame({
'np.float16': pd.Series(
[np.float16(9.1), np.float16(-9.1), np.float16(65.0)], dtype='float16'
)
}),
'np.float32': pd.DataFrame({
'np.float32': pd.Series(
[np.float32(1.2), np.float32(-1.2), np.float32(3.40)], dtype='float32'
)
}),
'np.float64': pd.DataFrame({
'np.float64': pd.Series(
[np.float64(1.3), np.float64(-11.3), np.float64(1.7)], dtype='float64'
)
}),
'np.complex64': pd.DataFrame({
'np.complex64': pd.Series(
[np.complex64(12 + 1j), np.complex64(-12 - 1j), np.complex64(3.4e38 + 1j)],
dtype='complex64',
)
}),
'np.complex128': pd.DataFrame({
'np.complex128': pd.Series(
[np.complex128(13 + 2j), np.complex128(-13 - 2j), np.complex128(1.7e308 + 2j)],
dtype='complex128',
)
}),
'np.bool': pd.DataFrame({
'np.bool': pd.Series([np.bool_(True), np.bool_(False), np.bool_(True)], dtype='bool')
}),
'np.object': pd.DataFrame({
'np.object': pd.Series(['object1', 'object2', 'object3'], dtype='object')
}),
'np.string': pd.DataFrame({
'np.string': pd.Series([
np.string_('string1'),
np.string_('string2'),
np.string_('string3'),
])
}),
'np.unicode': pd.DataFrame({
'np.unicode': pd.Series(
[np.unicode_('unicode1'), np.unicode_('unicode2'), np.unicode_('unicode3')],
dtype='string',
)
}),
'np.datetime64': pd.DataFrame({
'np.datetime64': pd.Series([
np.datetime64('2023-01-01T00:00:00'),
np.datetime64('2024-01-01T00:00:00'),
np.datetime64('2025-01-01T00:00:00'),
])
}),
'np.timedelta64': pd.DataFrame({
'np.timedelta64': pd.Series(
[np.timedelta64(1, 'D'), np.timedelta64(2, 'h'), np.timedelta64(3, 'm')],
)
}),
}
Loading

0 comments on commit 3e218b6

Please sign in to comment.