-
Notifications
You must be signed in to change notification settings - Fork 2
/
bbc_le_downloader.py
99 lines (83 loc) · 3.37 KB
/
bbc_le_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/python3
import requests
from bs4 import BeautifulSoup
import re
import os
import sys
from datetime import datetime
#Create index file if not exist
if not os.path.exists('./.podcast_index'):
with open('./.podcast_index', 'w'):
with open('./.podcast_index', 'a') as index_file:
index_file.write('Generated By BBC Podcast Downloader V1 - Shyamjos.com (c) 2018 \n-------------------------------------------------------------\n Index of previously Downloaded episodes \n Note: Donot delete this file unless you want to redownload them all \n-------------------------------------------------------------\n')
url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"
def download_podcast(url,name):
#fetch podcast url
six_min_english = requests.get(url)
#save http response
html = six_min_english.text
#parse all a tags with BS
soup = BeautifulSoup(html,'html.parser')
#save parsed a tags
links = soup.find_all('a')
# Extract Mp3 links from a tags
for a in soup.findAll('a',href=re.compile('http.*\.mp3')):
#skip low quality version and already downloaded podcasts
if not 'audio-nondrm-download-low' in (a['href']) and not (a['href']) in open('.podcast_index').read():
#extract episode title from 'download' class and remove unwanted white spaces
extract_filename=re.sub(' ', '_', (a['download']))
filename=extract_filename.split(',')[1]
print ("Downloading Podcast From:"+ ' ' + name + ' :' + filename)
download_link = requests.get((a['href']), stream=True)
# Throw an error for bad status codes
download_link.raise_for_status()
with open(filename, 'wb') as handle:
for block in download_link.iter_content(1024):
handle.write(block)
# Add downloaded podcast url to .podcast_index file
datestring = datetime.strftime(datetime.now(), '%Y/%m/%d_%H:%M:%S')
with open('.podcast_index', 'a') as index_file:
index_file.write(datestring +' ' + (a['href']) + "\n")
else:
continue
print("Up To Date:"+' ' + name)
ans=True
while ans:
print ("""
1.Download BBC 6 Minute English Podcast
2.Download BBC 6 Minute Vocabulary
3.Download BBC 6 Minute Grammar
4.Download All (6 Minute English,Vocabulary,Grammar)
5.Exit/Quit
""")
ans=input("What would you like to do? ")
if ans=="1":
name="6 Minute English"
url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"
download_podcast(url,name)
sys.exit(0)
elif ans=="2":
name="6 Minute Vocabulary"
url="https://www.bbc.co.uk/programmes/p02pc9xz/episodes/downloads"
download_podcast(url,name)
sys.exit(0)
elif ans=="3":
name="6 Minute Grammar"
url="https://www.bbc.co.uk/programmes/p02pc9wq/episodes/downloads"
download_podcast(url,name)
sys.exit(0)
elif ans=="4":
name="6 Minute English"
url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"
download_podcast(url,name)
name="6 Minute Vocabulary"
url="https://www.bbc.co.uk/programmes/p02pc9xz/episodes/downloads"
download_podcast(url,name)
name="6 Minute Grammar"
url="https://www.bbc.co.uk/programmes/p02pc9wq/episodes/downloads"
download_podcast(url,name)
sys.exit(0)
elif ans=="5":
sys.exit(0)
elif ans !="":
print("\n Not Valid Choice Try again")