Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete recordings w/ only bots or that are silent #73

Merged
merged 3 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion scripts/lib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
'corp': 3,
}

ZOOM_ROLES = {
'owner': 0,
'admin': 1,
'member': 2,
}

VIDEO_CATEGORY_IDS = {
'Film & Animation': 1,
'Autos & Vehicles': 2,
Expand Down Expand Up @@ -36,4 +42,4 @@
'Shorts': 42,
'Shows': 43,
'Trailers': 44,
}
}
242 changes: 167 additions & 75 deletions scripts/upload_zoom_recordings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,60 +29,140 @@
import os
import re
import requests
import subprocess
import sys
import tempfile
from typing import Dict
from urllib.parse import urlparse
from zoomus import ZoomClient
from lib.constants import USER_TYPES, VIDEO_CATEGORY_IDS
from lib.constants import VIDEO_CATEGORY_IDS, ZOOM_ROLES
from lib.youtube import get_youtube_client, upload_video, add_video_to_playlist, validate_youtube_credentials

YOUTUBE_CREDENTIALS_PATH = '.youtube-upload-credentials.json'
ZOOM_CLIENT_ID = os.environ['EDGI_ZOOM_CLIENT_ID']
ZOOM_CLIENT_SECRET = os.environ['EDGI_ZOOM_CLIENT_SECRET']
ZOOM_ACCOUNT_ID = os.environ['EDGI_ZOOM_ACCOUNT_ID']

def is_truthy(x): return x.lower() in ['true', '1', 'y', 'yes']
ZOOM_DELETE_AFTER_UPLOAD = is_truthy(os.environ.get('EDGI_ZOOM_DELETE_AFTER_UPLOAD', ''))
DRY_RUN = is_truthy(os.environ.get('EDGI_DRY_RUN', ''))

MEETINGS_TO_RECORD = ['EDGI Community Standup']
DEFAULT_YOUTUBE_PLAYLIST = 'Uploads from Zoom'
DEFAULT_YOUTUBE_CATEGORY = 'Science & Technology'
DEFAULT_VIDEO_LICENSE = 'creativeCommon'
DO_FILTER = False

client = ZoomClient(ZOOM_CLIENT_ID, ZOOM_CLIENT_SECRET, ZOOM_ACCOUNT_ID)
# Ignore users with names that match these patterns when determining if a
# meeting has any participants and its recordings should be preserved.
ZOOM_IGNORE_USER_NAMES = (
# The otter.ai notetaker bot is always present in most meetings.
re.compile(r'Otter\.ai', re.I),
)


def is_truthy(x):
return x.lower() in ['true', '1', 'y', 'yes']


ZOOM_DELETE_AFTER_UPLOAD = is_truthy(os.environ.get('EDGI_ZOOM_DELETE_AFTER_UPLOAD', ''))
DRY_RUN = is_truthy(os.environ.get('EDGI_DRY_RUN', ''))


class ZoomError(Exception):
def __init__(self, response, message=None):
try:
data = response.json()
except Exception:
data = {}

# Get main account, which should be 'pro'
pro_users = [user for user in client.user.list().json()['users'] if user['type'] >= USER_TYPES['pro'] ]
user_id = pro_users[0]['id']
if not message:
message = data.pop('message', 'Zoom API error!')

def fix_date(date_string):
data['http_status'] = response.status_code
full_message = f'{message} ({data!r}) Check the docs for details: https://developers.zoom.us/docs/api/.'
super.__init__(full_message)

@classmethod
def is_error(cls, response):
return response.status_code >= 400

@classmethod
def raise_if_error(cls, response, message=None):
if cls.is_error(response):
raise cls(response, message)

@classmethod
def parse_or_raise(cls, response, message=None) -> Dict:
cls.raise_if_error(response, message)
return response.json()


def fix_date(date_string: str) -> str:
date = date_string
index = date.find('Z')
date = date[:index] + '.0' + date[index:]

return date

def pretty_date(date_string):

def pretty_date(date_string: str) -> str:
return datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ').strftime('%b %-d, %Y')

def download_file(url, download_path, query=None):
r = requests.get(url, params=query, stream=True)

def download_zoom_file(client: ZoomClient, url: str, download_directory: str) -> str:
# Note the token info in the client isn't really *public*, but it's
# not explicitly private, either. Use `config[]` syntax instead of
# `config.get()` so we get an exception if things have changed and
# this data is no longer available.
r = requests.get(url, stream=True, headers={
'Authorization': f'Bearer {client.config['token']}'
})
r.raise_for_status()
resolved_url = r.url
filename = urlparse(resolved_url).path.split('/')[-1]
filepath = os.path.join(download_path, filename)
filepath = os.path.join(download_directory, filename)
if os.path.exists(filepath):
r.close()
return
with open(filepath, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
if chunk: # filter out keep-alive new chunks
f.write(chunk)

return filepath


def meeting_had_no_participants(client: ZoomClient, meeting: Dict) -> bool:
participants = ZoomError.parse_or_raise(client.past_meeting.get_participants(meeting_id=meeting['uuid']))['participants']

return all(
any(p.search(u['name']) for p in ZOOM_IGNORE_USER_NAMES)
for u in participants
)


def video_has_audio(file_path: str) -> bool:
"""Detect whether a video file has a non-silent audio track."""
result = subprocess.run([
'ffmpeg',
'-i', file_path,
# The `ebur128=peak` looks for the peak loudness level of the audio.
'-af', 'ebur128=peak=true',
'-f', 'null',
'-'
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

# No audio track.
if b'audio:0kib' in result.stdout.lower():
return False

# Selent audio. Note that this won't handle things like the low hiss of an
# empty room, which will report some low decibel level instead of `-inf`.
# In practice, this covers Zoom recordings where a mic was never turned on.
# Docs: https://ffmpeg.org/ffmpeg-filters.html#ebur128-1
if re.search(rb'Peak:\s+-inf', result.stdout):
return False

return True


def main():
if DRY_RUN:
print('⚠️ This is a dry run! Videos will not actually be uploaded.\n')
Expand All @@ -93,96 +173,108 @@ def main():
print('Please use `python scripts/auth.py` to re-authorize.')
return sys.exit(1)

zoom = ZoomClient(ZOOM_CLIENT_ID, ZOOM_CLIENT_SECRET, ZOOM_ACCOUNT_ID)

# Official meeting recordings we will upload belong to the account owner.
zoom_user_id = zoom.user.list(role_id=ZOOM_ROLES['owner']).json()['users'][0]['id']

with tempfile.TemporaryDirectory() as tmpdirname:
print('Creating tmp dir: ' + tmpdirname)
meetings = client.recording.list(user_id=user_id).json()['meetings']
print(f'Creating tmp dir: {tmpdirname}\n')

meetings = ZoomError.parse_or_raise(zoom.recording.list(user_id=zoom_user_id))['meetings']
meetings = sorted(meetings, key=lambda m: m['start_time'])
# Filter recordings less than 1 minute
meetings = filter(lambda m: m['duration'] > 1, meetings)
for meeting in meetings:
print(f'Processing meeting: {meeting["topic"]} from {meeting["start_time"]}')
print(f'Processing meeting: {meeting["topic"]} from {meeting["start_time"]} (ID: "{meeting['uuid']}")')

# 3. filter by criteria (no-op for now)
if meeting['topic'] not in MEETINGS_TO_RECORD and DO_FILTER:
print(' Skipping...')
print(' Skipping: meeting not in topic list.')
continue

if meeting_had_no_participants(zoom, meeting):
print(' Deleting recording: nobody attended this meeting.')
if not DRY_RUN:
response = zoom.recording.delete(meeting_id=meeting['uuid'], action='trash')
if response.status_code < 300:
print(' 🗑️ Deleted recording.')
else:
print(f' ❌ {ZoomError(response)}')
continue

videos = [file for file in meeting['recording_files']
if file['file_type'].lower() == 'mp4']

if len(videos) == 0:
print(f' No videos to upload: {meeting["topic"]}')
print(' 🔹 Skipping: no videos for meeting')
continue
elif any((file['file_size'] == 0 for file in videos)):
print(f' Meeting still processing: {meeting["topic"]}')
print(' 🔹 Skipping: meeting still processing')
continue

print(' Recording is permitted for upload!')
print(f' {len(videos)} videos to upload...')
for file in videos:
url = file['download_url']
print(f' Download from {url}...')
# Note the token info in the client isn't really *public*, but it's
# not explicitly private, either. Use `config[]` syntax instead of
# `config.get()` so we get an exception if things have changed and
# this data is no longer available.
filepath = download_file(url,
tmpdirname,
query={"access_token": client.config["token"]})

recording_date = fix_date(meeting['start_time'])
title = f'{meeting["topic"]} - {pretty_date(meeting["start_time"])}'

print(f' Uploading {filepath}\n {title=}\n {recording_date=}')
if not DRY_RUN:
video_id = upload_video(youtube,
filepath,
title=title,
category=VIDEO_CATEGORY_IDS["Science & Technology"],
license=DEFAULT_VIDEO_LICENSE,
recording_date=recording_date,
privacy_status='unlisted')

# Add all videos to default playlist
print(' Adding to main playlist: Uploads from Zoom')
if not DRY_RUN:
add_video_to_playlist(youtube, video_id, title=DEFAULT_YOUTUBE_PLAYLIST, privacy='unlisted')
print(f' Download from {url}...')
filepath = download_zoom_file(zoom, url, tmpdirname)

# Add to additional playlists
playlist_name = ''
if any(x in meeting['topic'].lower() for x in ['web mon', 'website monitoring', 'wm']):
playlist_name = 'Website Monitoring'
if video_has_audio(filepath):
recording_date = fix_date(meeting['start_time'])
title = f'{meeting["topic"]} - {pretty_date(meeting["start_time"])}'

if 'data together' in meeting['topic'].lower():
playlist_name = 'Data Together'
print(f' Uploading {filepath}\n {title=}\n {recording_date=}')
if not DRY_RUN:
video_id = upload_video(youtube,
filepath,
title=title,
category=VIDEO_CATEGORY_IDS["Science & Technology"],
license=DEFAULT_VIDEO_LICENSE,
recording_date=recording_date,
privacy_status='unlisted')

if 'community call' in meeting['topic'].lower():
playlist_name = 'Community Calls'
# Add all videos to default playlist
print(' Adding to main playlist: Uploads from Zoom')
if not DRY_RUN:
add_video_to_playlist(youtube, video_id, title=DEFAULT_YOUTUBE_PLAYLIST, privacy='unlisted')

if 'edgi introductions' in meeting['topic'].lower():
playlist_name = 'EDGI Introductions'
# Add to additional playlists
playlist_name = ''
if any(x in meeting['topic'].lower() for x in ['web mon', 'website monitoring', 'wm']):
playlist_name = 'Website Monitoring'

if 'all-edgi' in meeting['topic'].lower():
playlist_name = 'All-EDGI Meetings'
if 'data together' in meeting['topic'].lower():
playlist_name = 'Data Together'

if playlist_name:
print(f' Adding to call playlist: {playlist_name}')
if not DRY_RUN:
add_video_to_playlist(youtube, video_id, title=playlist_name, privacy='unlisted')
if 'community call' in meeting['topic'].lower():
playlist_name = 'Community Calls'

if 'edgi introductions' in meeting['topic'].lower():
playlist_name = 'EDGI Introductions'

if 'all-edgi' in meeting['topic'].lower():
playlist_name = 'All-EDGI Meetings'

if playlist_name:
print(f' Adding to call playlist: {playlist_name}')
if not DRY_RUN:
add_video_to_playlist(youtube, video_id, title=playlist_name, privacy='unlisted')

# TODO: save the chat log transcript in a comment on the video.
else:
print(' Skipping upload: video was silent (no mics were on).')

if ZOOM_DELETE_AFTER_UPLOAD and not DRY_RUN:
# Just delete the video for now, since that takes the most storage space.
# We should save the chat log transcript in a comment on the video.

# We're using the zoom api directly instead of zoomus, because zoomus only implements
# deleting all recorded files related to the meeting using the v2 API,
# while we still want to retain the audio and chat files for backup.
url = f'https://api.zoom.us/v2/meetings/{file["meeting_id"]}/recordings/{file["id"]}'
querystring = {"action":"trash"}
headers = {'authorization': f'Bearer {client.config["token"]}'}
response = requests.request("DELETE", url, headers=headers, params=querystring)
response = zoom.recording.delete_single_recording(
meeting_id=file['meeting_id'],
recording_id=file['id'],
action='trash'
)
if response.status_code == 204:
print(f' Deleted {file["file_type"]} file from Zoom for recording: {meeting["topic"]}')
print(f' 🗑️ Deleted {file["file_type"]} file from Zoom for recording: {meeting["topic"]}')
else:
print(f' The file could not be deleted. We received this response: {response.status_code}. Please check https://marketplace.zoom.us/docs/api-reference/zoom-api/cloud-recording/recordingdeleteone for what that could mean.')
print(f' ❌ {ZoomError(response)}')


if __name__ == '__main__':
Expand Down