Skip to content

Commit

Permalink
Upgrade google analytics reporting to use the v1 data api
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna committed Oct 26, 2023
1 parent 54ce4e4 commit 9008dfd
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 53 deletions.
10 changes: 6 additions & 4 deletions isic/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,12 @@ def mutate_configuration(configuration: ComposedConfiguration) -> None:
ISIC_DATACITE_API_URL = values.Value("https://api.test.datacite.org")
ISIC_DATACITE_USERNAME = values.Value(None)
ISIC_DATACITE_PASSWORD = values.SecretValue(None)
ISIC_GOOGLE_ANALYTICS_VIEW_IDS = [
"183845203", # ISIC Gallery
"217814783", # ISIC Challenge 2020
"199577101", # ISIC Challenge
ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = [
"360152967", # ISIC Gallery
"368050084", # ISIC Challenge 2020
"360125792", # ISIC Challenge
"265191179", # ISIC API
"265233311", # ISDIS
]
# This is technically a secret, but it's unset in sandbox so we don't want to make
# it required.
Expand Down
77 changes: 30 additions & 47 deletions isic/stats/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@
from datetime import timedelta
import gzip
from io import BytesIO
import json
from types import SimpleNamespace
import urllib.parse

from apiclient.discovery import build
import boto3
from botocore.config import Config
from celery import shared_task
from celery.utils.log import get_task_logger
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from googleapiclient.errors import HttpError
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest
from more_itertools.more import chunked
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import pycountry

Expand All @@ -37,51 +35,36 @@ def _s3_client():


def _initialize_analyticsreporting():
credentials = ServiceAccountCredentials.from_json_keyfile_dict(
json.loads(settings.ISIC_GOOGLE_API_JSON_KEY), SCOPES
from google.oauth2 import service_account

# json_acct_info = json.loads(settings.ISIC_GOOGLE_API_JSON_KEY)
credentials = service_account.Credentials.from_service_account_info(
settings.ISIC_GOOGLE_API_JSON_KEY
)
analytics = build("analyticsreporting", "v4", credentials=credentials)
return analytics

scoped_credentials = credentials.with_scopes(SCOPES)

return BetaAnalyticsDataClient(credentials=scoped_credentials)


def _get_google_analytics_report(analytics, view_id: str) -> dict:
def _get_google_analytics_report(client, property_id: str) -> dict:
results = {
"num_sessions": 0,
"sessions_per_country": defaultdict(int),
}
response = (
analytics.reports()
.batchGet(
body={
"reportRequests": [
{
"viewId": view_id,
"dateRanges": [{"startDate": "30daysAgo", "endDate": "today"}],
"metrics": [{"expression": "ga:sessions"}],
"dimensions": [{"name": "ga:countryIsoCode"}],
}
]
}
)
.execute()
)

for report in response.get("reports", []):
column_header = report.get("columnHeader", {})
metric_headers = column_header.get("metricHeader", {}).get("metricHeaderEntries", [])

for row in report.get("data", {}).get("rows", []):
dimensions = row.get("dimensions", [])
date_range_values = row.get("metrics", [])
request = RunReportRequest(
property=f"properties/{property_id}",
dimensions=[Dimension(name="countryId")],
metrics=[Metric(name="sessions")],
date_ranges=[DateRange(start_date="30daysAgo", end_date="today")],
)
response = client.run_report(request)

for _, values in enumerate(date_range_values):
for _, value in zip(metric_headers, values.get("values")):
if dimensions[0] != "ZZ": # unknown country
results["sessions_per_country"][dimensions[0]] += int(value)

results["num_sessions"] += int(
report.get("data", {}).get("totals", [{}])[0].get("values", ["0"])[0]
)
for row in response.rows:
country_id, sessions = row.dimension_values[0].value, row.metric_values[0].value
results["sessions_per_country"][country_id] += int(sessions)
results["num_sessions"] += int(sessions)

return results

Expand All @@ -106,9 +89,6 @@ def _country_from_iso_code(iso_code: str) -> dict:
@shared_task(
soft_time_limit=60,
time_limit=120,
# Figuring out retries within googleapiclient is a bit cumbersome, use celery.
autoretry_for=(HttpError,),
retry_backoff=True,
)
def collect_google_analytics_metrics_task():
if not settings.ISIC_GOOGLE_API_JSON_KEY:
Expand All @@ -117,19 +97,22 @@ def collect_google_analytics_metrics_task():
)
return

analytics = _initialize_analyticsreporting()
client = _initialize_analyticsreporting()
num_sessions = 0
sessions_per_country = []
sessions_per_iso_code: dict[str, int] = defaultdict(int)

for view_id in settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS:
results = _get_google_analytics_report(analytics, view_id)
for property_id in settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS:
results = _get_google_analytics_report(client, property_id)
num_sessions += results["num_sessions"]
for key, value in results["sessions_per_country"].items():
sessions_per_iso_code[key] += value

for iso_code, sessions in sessions_per_iso_code.items():
sessions_per_country.append({**{"sessions": sessions}, **_country_from_iso_code(iso_code)})
if iso_code != "(not set)":
sessions_per_country.append(
{**{"sessions": sessions}, **_country_from_iso_code(iso_code)}
)

GaMetrics.objects.create(
range_start=timezone.now() - timedelta(days=30),
Expand Down
2 changes: 1 addition & 1 deletion isic/stats/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
@pytest.mark.django_db
def test_collect_google_analytics_task(mocker, settings):
# only have one VIEW_ID, otherwise the counts will be multiplied
settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS = ["just_one"]
settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = ["just_one"]
settings.ISIC_GOOGLE_API_JSON_KEY = "something"

mocker.patch("isic.stats.tasks._initialize_analyticsreporting", mocker.MagicMock)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"django-spurl",
"django-storages>=1.14",
"django-widget-tweaks",
"google-api-python-client",
"google-analytics-data",
"hashids",
"isic-metadata>=0.2.0",
"jaro-winkler",
Expand Down

0 comments on commit 9008dfd

Please sign in to comment.