From e34d3980add1812781b0c86daec3ccbd5eba2359 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Thu, 29 Aug 2024 14:01:14 -0700 Subject: [PATCH 1/3] Delete recordings w/ only bots or that are silent Instead of uploading recordings of meetings that only had bots (e.g. the Otter.ai notetaker) or that are completely silent (nobody turned on their mic; probably only one person showed up), delete those recordings from Zoom (we still want to save space) and do not upload them to YouTube (nobody needs to see empty meetings). This also includes some light refactor of a few things and better error handling around Zoom API calls. --- scripts/upload_zoom_recordings.py | 254 +++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 76 deletions(-) diff --git a/scripts/upload_zoom_recordings.py b/scripts/upload_zoom_recordings.py index 565c8e5..62c0f77 100644 --- a/scripts/upload_zoom_recordings.py +++ b/scripts/upload_zoom_recordings.py @@ -29,8 +29,10 @@ import os import re import requests +import subprocess import sys import tempfile +from typing import Dict from urllib.parse import urlparse from zoomus import ZoomClient from lib.constants import USER_TYPES, VIDEO_CATEGORY_IDS @@ -41,48 +43,139 @@ ZOOM_CLIENT_SECRET = os.environ['EDGI_ZOOM_CLIENT_SECRET'] ZOOM_ACCOUNT_ID = os.environ['EDGI_ZOOM_ACCOUNT_ID'] -def is_truthy(x): return x.lower() in ['true', '1', 'y', 'yes'] -ZOOM_DELETE_AFTER_UPLOAD = is_truthy(os.environ.get('EDGI_ZOOM_DELETE_AFTER_UPLOAD', '')) -DRY_RUN = is_truthy(os.environ.get('EDGI_DRY_RUN', '')) - MEETINGS_TO_RECORD = ['EDGI Community Standup'] DEFAULT_YOUTUBE_PLAYLIST = 'Uploads from Zoom' DEFAULT_YOUTUBE_CATEGORY = 'Science & Technology' DEFAULT_VIDEO_LICENSE = 'creativeCommon' DO_FILTER = False -client = ZoomClient(ZOOM_CLIENT_ID, ZOOM_CLIENT_SECRET, ZOOM_ACCOUNT_ID) +# Ignore users with names that match these patterns when determining if a +# meeting has any participants and its recordings should be preserved. +ZOOM_IGNORE_USER_NAMES = ( + # The otter.ai notetaker bot is always present in most meetings. + re.compile(r'Otter\.ai', re.I), +) + + +def is_truthy(x): + return x.lower() in ['true', '1', 'y', 'yes'] + + +ZOOM_DELETE_AFTER_UPLOAD = is_truthy(os.environ.get('EDGI_ZOOM_DELETE_AFTER_UPLOAD', '')) +DRY_RUN = is_truthy(os.environ.get('EDGI_DRY_RUN', '')) + + +class ZoomError(Exception): + def __init__(self, response, message=None): + try: + data = response.json() + except Exception: + data = {} + + if not message: + message = data.pop('message', 'Zoom API error!') -# Get main account, which should be 'pro' -pro_users = [user for user in client.user.list().json()['users'] if user['type'] >= USER_TYPES['pro'] ] -user_id = pro_users[0]['id'] + data['http_status'] = response.status_code + full_message = f'{message} ({data!r}) Check the docs for details: https://developers.zoom.us/docs/api/.' + super.__init__(full_message) -def fix_date(date_string): + @classmethod + def is_error(cls, response): + return response.status_code >= 400 + + @classmethod + def raise_if_error(cls, response, message=None): + if cls.is_error(response): + raise cls(response, message) + + @classmethod + def parse_or_raise(cls, response, message=None) -> Dict: + cls.raise_if_error(response, message) + return response.json() + + +def fix_date(date_string: str) -> str: date = date_string index = date.find('Z') date = date[:index] + '.0' + date[index:] return date -def pretty_date(date_string): + +def pretty_date(date_string: str) -> str: return datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ').strftime('%b %-d, %Y') -def download_file(url, download_path, query=None): - r = requests.get(url, params=query, stream=True) + +def download_zoom_file(client: ZoomClient, url: str, download_directory: str) -> str: + # Note the token info in the client isn't really *public*, but it's + # not explicitly private, either. Use `config[]` syntax instead of + # `config.get()` so we get an exception if things have changed and + # this data is no longer available. + r = requests.get(url, stream=True, headers={ + 'Authorization': f'Bearer {client.config['token']}' + }) r.raise_for_status() resolved_url = r.url filename = urlparse(resolved_url).path.split('/')[-1] - filepath = os.path.join(download_path, filename) + filepath = os.path.join(download_directory, filename) if os.path.exists(filepath): r.close() return with open(filepath, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks + if chunk: # filter out keep-alive new chunks f.write(chunk) return filepath + +def delete_zoom_recording_file(client: ZoomClient, file): + """ + Delete a single file from a meeting recording. + + This exists because zoomus only has built in support for deleting a whole + recording and all its files. However, we often want to delete a particular + file (e.g. delete the video, but leave the audio or chat transcript). + """ + response = client.meeting.delete_request(f'/meetings/{file["meeting_id"]}/recordings/{file["id"]}', params={'action': 'trash'}) + if response.status_code != 204: + raise ZoomError(response) + + +def meeting_had_no_participants(client: ZoomClient, meeting: Dict) -> bool: + participants = ZoomError.parse_or_raise(client.past_meeting.get_participants(meeting_id=meeting['uuid']))['participants'] + + return all( + any(p.search(u['name']) for p in ZOOM_IGNORE_USER_NAMES) + for u in participants + ) + + +def video_has_audio(file_path: str) -> bool: + """Detect whether a video file has a non-silent audio track.""" + result = subprocess.run([ + 'ffmpeg', + '-i', file_path, + # The `ebur128=peak` looks for the peak loudness level of the audio. + '-af', 'ebur128=peak=true', + '-f', 'null', + '-' + ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + # No audio track. + if b'audio:0kib' in result.stdout.lower(): + return False + + # Selent audio. Note that this won't handle things like the low hiss of an + # empty room, which will report some low decibel level instead of `-inf`. + # In practice, this covers Zoom recordings where a mic was never turned on. + # Docs: https://ffmpeg.org/ffmpeg-filters.html#ebur128-1 + if re.search(rb'Peak:\s+-inf', result.stdout): + return False + + return True + + def main(): if DRY_RUN: print('⚠️ This is a dry run! Videos will not actually be uploaded.\n') @@ -93,96 +186,105 @@ def main(): print('Please use `python scripts/auth.py` to re-authorize.') return sys.exit(1) + zoom = ZoomClient(ZOOM_CLIENT_ID, ZOOM_CLIENT_SECRET, ZOOM_ACCOUNT_ID) + + # Get main account, which should be 'pro' + zoom_user_id = next(user['id'] for user in zoom.user.list().json()['users'] + if user['type'] >= USER_TYPES['pro']) + with tempfile.TemporaryDirectory() as tmpdirname: - print('Creating tmp dir: ' + tmpdirname) - meetings = client.recording.list(user_id=user_id).json()['meetings'] + print(f'Creating tmp dir: {tmpdirname}\n') + + meetings = ZoomError.parse_or_raise(zoom.recording.list(user_id=zoom_user_id))['meetings'] meetings = sorted(meetings, key=lambda m: m['start_time']) # Filter recordings less than 1 minute meetings = filter(lambda m: m['duration'] > 1, meetings) for meeting in meetings: - print(f'Processing meeting: {meeting["topic"]} from {meeting["start_time"]}') + print(f'Processing meeting: {meeting["topic"]} from {meeting["start_time"]} (ID: "{meeting['uuid']}")') + # 3. filter by criteria (no-op for now) if meeting['topic'] not in MEETINGS_TO_RECORD and DO_FILTER: - print(' Skipping...') + print(' Skipping: meeting not in topic list.') + continue + + if meeting_had_no_participants(zoom, meeting): + print(' Deleting recording: nobody attended this meeting.') + if not DRY_RUN: + response = zoom.recording.delete(meeting_id=meeting['uuid'], action='trash') + if response.status_code < 300: + print(' 🗑️ Deleted recording.') + else: + print(f' ❌ {ZoomError(response)}') continue videos = [file for file in meeting['recording_files'] if file['file_type'].lower() == 'mp4'] if len(videos) == 0: - print(f' No videos to upload: {meeting["topic"]}') + print(' 🔹 Skipping: no videos for meeting') continue elif any((file['file_size'] == 0 for file in videos)): - print(f' Meeting still processing: {meeting["topic"]}') + print(' 🔹 Skipping: meeting still processing') continue - print(' Recording is permitted for upload!') + print(f' {len(videos)} videos to upload...') for file in videos: url = file['download_url'] - print(f' Download from {url}...') - # Note the token info in the client isn't really *public*, but it's - # not explicitly private, either. Use `config[]` syntax instead of - # `config.get()` so we get an exception if things have changed and - # this data is no longer available. - filepath = download_file(url, - tmpdirname, - query={"access_token": client.config["token"]}) - - recording_date = fix_date(meeting['start_time']) - title = f'{meeting["topic"]} - {pretty_date(meeting["start_time"])}' - - print(f' Uploading {filepath}\n {title=}\n {recording_date=}') - if not DRY_RUN: - video_id = upload_video(youtube, - filepath, - title=title, - category=VIDEO_CATEGORY_IDS["Science & Technology"], - license=DEFAULT_VIDEO_LICENSE, - recording_date=recording_date, - privacy_status='unlisted') - - # Add all videos to default playlist - print(' Adding to main playlist: Uploads from Zoom') - if not DRY_RUN: - add_video_to_playlist(youtube, video_id, title=DEFAULT_YOUTUBE_PLAYLIST, privacy='unlisted') + print(f' Download from {url}...') + filepath = download_zoom_file(zoom, url, tmpdirname) + + if video_has_audio(filepath): + recording_date = fix_date(meeting['start_time']) + title = f'{meeting["topic"]} - {pretty_date(meeting["start_time"])}' + + print(f' Uploading {filepath}\n {title=}\n {recording_date=}') + if not DRY_RUN: + video_id = upload_video(youtube, + filepath, + title=title, + category=VIDEO_CATEGORY_IDS["Science & Technology"], + license=DEFAULT_VIDEO_LICENSE, + recording_date=recording_date, + privacy_status='unlisted') - # Add to additional playlists - playlist_name = '' - if any(x in meeting['topic'].lower() for x in ['web mon', 'website monitoring', 'wm']): - playlist_name = 'Website Monitoring' + # Add all videos to default playlist + print(' Adding to main playlist: Uploads from Zoom') + if not DRY_RUN: + add_video_to_playlist(youtube, video_id, title=DEFAULT_YOUTUBE_PLAYLIST, privacy='unlisted') - if 'data together' in meeting['topic'].lower(): - playlist_name = 'Data Together' + # Add to additional playlists + playlist_name = '' + if any(x in meeting['topic'].lower() for x in ['web mon', 'website monitoring', 'wm']): + playlist_name = 'Website Monitoring' - if 'community call' in meeting['topic'].lower(): - playlist_name = 'Community Calls' + if 'data together' in meeting['topic'].lower(): + playlist_name = 'Data Together' - if 'edgi introductions' in meeting['topic'].lower(): - playlist_name = 'EDGI Introductions' + if 'community call' in meeting['topic'].lower(): + playlist_name = 'Community Calls' - if 'all-edgi' in meeting['topic'].lower(): - playlist_name = 'All-EDGI Meetings' + if 'edgi introductions' in meeting['topic'].lower(): + playlist_name = 'EDGI Introductions' - if playlist_name: - print(f' Adding to call playlist: {playlist_name}') - if not DRY_RUN: - add_video_to_playlist(youtube, video_id, title=playlist_name, privacy='unlisted') + if 'all-edgi' in meeting['topic'].lower(): + playlist_name = 'All-EDGI Meetings' + + if playlist_name: + print(f' Adding to call playlist: {playlist_name}') + if not DRY_RUN: + add_video_to_playlist(youtube, video_id, title=playlist_name, privacy='unlisted') + + # TODO: save the chat log transcript in a comment on the video. + else: + print(' Skipping upload: video was silent (no mics were on).') if ZOOM_DELETE_AFTER_UPLOAD and not DRY_RUN: # Just delete the video for now, since that takes the most storage space. - # We should save the chat log transcript in a comment on the video. - - # We're using the zoom api directly instead of zoomus, because zoomus only implements - # deleting all recorded files related to the meeting using the v2 API, - # while we still want to retain the audio and chat files for backup. - url = f'https://api.zoom.us/v2/meetings/{file["meeting_id"]}/recordings/{file["id"]}' - querystring = {"action":"trash"} - headers = {'authorization': f'Bearer {client.config["token"]}'} - response = requests.request("DELETE", url, headers=headers, params=querystring) - if response.status_code == 204: - print(f' Deleted {file["file_type"]} file from Zoom for recording: {meeting["topic"]}') - else: - print(f' The file could not be deleted. We received this response: {response.status_code}. Please check https://marketplace.zoom.us/docs/api-reference/zoom-api/cloud-recording/recordingdeleteone for what that could mean.') + try: + delete_zoom_recording_file(zoom, file) + print(f' 🗑️ Deleted {file["file_type"]} file from Zoom for recording: {meeting["topic"]}') + except ZoomError as error: + print(f' ❌ {error}') if __name__ == '__main__': From 754ba7a85f567ac927c93c016fa99260b54f635b Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Thu, 29 Aug 2024 14:09:24 -0700 Subject: [PATCH 2/3] Actually, zoomus *does* have single-file deletion --- scripts/upload_zoom_recordings.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/scripts/upload_zoom_recordings.py b/scripts/upload_zoom_recordings.py index 62c0f77..dce6261 100644 --- a/scripts/upload_zoom_recordings.py +++ b/scripts/upload_zoom_recordings.py @@ -129,19 +129,6 @@ def download_zoom_file(client: ZoomClient, url: str, download_directory: str) -> return filepath -def delete_zoom_recording_file(client: ZoomClient, file): - """ - Delete a single file from a meeting recording. - - This exists because zoomus only has built in support for deleting a whole - recording and all its files. However, we often want to delete a particular - file (e.g. delete the video, but leave the audio or chat transcript). - """ - response = client.meeting.delete_request(f'/meetings/{file["meeting_id"]}/recordings/{file["id"]}', params={'action': 'trash'}) - if response.status_code != 204: - raise ZoomError(response) - - def meeting_had_no_participants(client: ZoomClient, meeting: Dict) -> bool: participants = ZoomError.parse_or_raise(client.past_meeting.get_participants(meeting_id=meeting['uuid']))['participants'] @@ -280,11 +267,15 @@ def main(): if ZOOM_DELETE_AFTER_UPLOAD and not DRY_RUN: # Just delete the video for now, since that takes the most storage space. - try: - delete_zoom_recording_file(zoom, file) + response = zoom.recording.delete_single_recording( + meeting_id=file['meeting_id'], + recording_id=file['id'], + action='trash' + ) + if response.status_code == 204: print(f' 🗑️ Deleted {file["file_type"]} file from Zoom for recording: {meeting["topic"]}') - except ZoomError as error: - print(f' ❌ {error}') + else: + print(f' ❌ {ZoomError(response)}') if __name__ == '__main__': From 24cc9f97b8af3356bc40bc388ebba20821f91c17 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Thu, 29 Aug 2024 14:33:27 -0700 Subject: [PATCH 3/3] Get the main user in a better way --- scripts/lib/constants.py | 8 +++++++- scripts/upload_zoom_recordings.py | 7 +++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/lib/constants.py b/scripts/lib/constants.py index b9cffd6..6156acc 100644 --- a/scripts/lib/constants.py +++ b/scripts/lib/constants.py @@ -4,6 +4,12 @@ 'corp': 3, } +ZOOM_ROLES = { + 'owner': 0, + 'admin': 1, + 'member': 2, +} + VIDEO_CATEGORY_IDS = { 'Film & Animation': 1, 'Autos & Vehicles': 2, @@ -36,4 +42,4 @@ 'Shorts': 42, 'Shows': 43, 'Trailers': 44, -} \ No newline at end of file +} diff --git a/scripts/upload_zoom_recordings.py b/scripts/upload_zoom_recordings.py index dce6261..fc440f4 100644 --- a/scripts/upload_zoom_recordings.py +++ b/scripts/upload_zoom_recordings.py @@ -35,7 +35,7 @@ from typing import Dict from urllib.parse import urlparse from zoomus import ZoomClient -from lib.constants import USER_TYPES, VIDEO_CATEGORY_IDS +from lib.constants import VIDEO_CATEGORY_IDS, ZOOM_ROLES from lib.youtube import get_youtube_client, upload_video, add_video_to_playlist, validate_youtube_credentials YOUTUBE_CREDENTIALS_PATH = '.youtube-upload-credentials.json' @@ -175,9 +175,8 @@ def main(): zoom = ZoomClient(ZOOM_CLIENT_ID, ZOOM_CLIENT_SECRET, ZOOM_ACCOUNT_ID) - # Get main account, which should be 'pro' - zoom_user_id = next(user['id'] for user in zoom.user.list().json()['users'] - if user['type'] >= USER_TYPES['pro']) + # Official meeting recordings we will upload belong to the account owner. + zoom_user_id = zoom.user.list(role_id=ZOOM_ROLES['owner']).json()['users'][0]['id'] with tempfile.TemporaryDirectory() as tmpdirname: print(f'Creating tmp dir: {tmpdirname}\n')