diff --git a/isic/settings.py b/isic/settings.py index a64c9eeb..a36b3b8d 100644 --- a/isic/settings.py +++ b/isic/settings.py @@ -120,10 +120,12 @@ def mutate_configuration(configuration: ComposedConfiguration) -> None: ISIC_DATACITE_API_URL = values.Value("https://api.test.datacite.org") ISIC_DATACITE_USERNAME = values.Value(None) ISIC_DATACITE_PASSWORD = values.SecretValue(None) - ISIC_GOOGLE_ANALYTICS_VIEW_IDS = [ - "183845203", # ISIC Gallery - "217814783", # ISIC Challenge 2020 - "199577101", # ISIC Challenge + ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = [ + "360152967", # ISIC Gallery + "368050084", # ISIC Challenge 2020 + "360125792", # ISIC Challenge + "265191179", # ISIC API + "265233311", # ISDIS ] # This is technically a secret, but it's unset in sandbox so we don't want to make # it required. diff --git a/isic/stats/tasks.py b/isic/stats/tasks.py index c396c633..c4043ae9 100644 --- a/isic/stats/tasks.py +++ b/isic/stats/tasks.py @@ -3,11 +3,9 @@ from datetime import timedelta import gzip from io import BytesIO -import json from types import SimpleNamespace import urllib.parse -from apiclient.discovery import build import boto3 from botocore.config import Config from celery import shared_task @@ -15,9 +13,9 @@ from django.conf import settings from django.db import transaction from django.utils import timezone -from googleapiclient.errors import HttpError +from google.analytics.data_v1beta import BetaAnalyticsDataClient +from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest from more_itertools.more import chunked -from oauth2client.service_account import ServiceAccountCredentials import pandas as pd import pycountry @@ -37,51 +35,36 @@ def _s3_client(): def _initialize_analyticsreporting(): - credentials = ServiceAccountCredentials.from_json_keyfile_dict( - json.loads(settings.ISIC_GOOGLE_API_JSON_KEY), SCOPES + from google.oauth2 import service_account + + # json_acct_info = json.loads(settings.ISIC_GOOGLE_API_JSON_KEY) + credentials = service_account.Credentials.from_service_account_info( + settings.ISIC_GOOGLE_API_JSON_KEY ) - analytics = build("analyticsreporting", "v4", credentials=credentials) - return analytics + + scoped_credentials = credentials.with_scopes(SCOPES) + + return BetaAnalyticsDataClient(credentials=scoped_credentials) -def _get_google_analytics_report(analytics, view_id: str) -> dict: +def _get_google_analytics_report(client, property_id: str) -> dict: results = { "num_sessions": 0, "sessions_per_country": defaultdict(int), } - response = ( - analytics.reports() - .batchGet( - body={ - "reportRequests": [ - { - "viewId": view_id, - "dateRanges": [{"startDate": "30daysAgo", "endDate": "today"}], - "metrics": [{"expression": "ga:sessions"}], - "dimensions": [{"name": "ga:countryIsoCode"}], - } - ] - } - ) - .execute() - ) - for report in response.get("reports", []): - column_header = report.get("columnHeader", {}) - metric_headers = column_header.get("metricHeader", {}).get("metricHeaderEntries", []) - - for row in report.get("data", {}).get("rows", []): - dimensions = row.get("dimensions", []) - date_range_values = row.get("metrics", []) + request = RunReportRequest( + property=f"properties/{property_id}", + dimensions=[Dimension(name="countryId")], + metrics=[Metric(name="sessions")], + date_ranges=[DateRange(start_date="30daysAgo", end_date="today")], + ) + response = client.run_report(request) - for _, values in enumerate(date_range_values): - for _, value in zip(metric_headers, values.get("values")): - if dimensions[0] != "ZZ": # unknown country - results["sessions_per_country"][dimensions[0]] += int(value) - - results["num_sessions"] += int( - report.get("data", {}).get("totals", [{}])[0].get("values", ["0"])[0] - ) + for row in response.rows: + country_id, sessions = row.dimension_values[0].value, row.metric_values[0].value + results["sessions_per_country"][country_id] += int(sessions) + results["num_sessions"] += int(sessions) return results @@ -106,9 +89,6 @@ def _country_from_iso_code(iso_code: str) -> dict: @shared_task( soft_time_limit=60, time_limit=120, - # Figuring out retries within googleapiclient is a bit cumbersome, use celery. - autoretry_for=(HttpError,), - retry_backoff=True, ) def collect_google_analytics_metrics_task(): if not settings.ISIC_GOOGLE_API_JSON_KEY: @@ -117,19 +97,22 @@ def collect_google_analytics_metrics_task(): ) return - analytics = _initialize_analyticsreporting() + client = _initialize_analyticsreporting() num_sessions = 0 sessions_per_country = [] sessions_per_iso_code: dict[str, int] = defaultdict(int) - for view_id in settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS: - results = _get_google_analytics_report(analytics, view_id) + for property_id in settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS: + results = _get_google_analytics_report(client, property_id) num_sessions += results["num_sessions"] for key, value in results["sessions_per_country"].items(): sessions_per_iso_code[key] += value for iso_code, sessions in sessions_per_iso_code.items(): - sessions_per_country.append({**{"sessions": sessions}, **_country_from_iso_code(iso_code)}) + if iso_code != "(not set)": + sessions_per_country.append( + {**{"sessions": sessions}, **_country_from_iso_code(iso_code)} + ) GaMetrics.objects.create( range_start=timezone.now() - timedelta(days=30), diff --git a/isic/stats/tests/test_tasks.py b/isic/stats/tests/test_tasks.py index 41502314..f7c1886f 100644 --- a/isic/stats/tests/test_tasks.py +++ b/isic/stats/tests/test_tasks.py @@ -20,7 +20,7 @@ @pytest.mark.django_db def test_collect_google_analytics_task(mocker, settings): # only have one VIEW_ID, otherwise the counts will be multiplied - settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS = ["just_one"] + settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = ["just_one"] settings.ISIC_GOOGLE_API_JSON_KEY = "something" mocker.patch("isic.stats.tasks._initialize_analyticsreporting", mocker.MagicMock) diff --git a/setup.py b/setup.py index 64c2a995..160a6ff6 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ "django-spurl", "django-storages>=1.14", "django-widget-tweaks", - "google-api-python-client", + "google-analytics-data", "hashids", "isic-metadata>=0.2.0", "jaro-winkler",