bbc_le_downloader.py

#!/usr/bin/python3

import requests
from bs4 import BeautifulSoup
import re
import os
import sys
from datetime import datetime

#Create index file if not exist
if not os.path.exists('./.podcast_index'):
    with open('./.podcast_index', 'w'): 
       with open('./.podcast_index', 'a') as index_file:
          index_file.write('Generated By BBC Podcast Downloader V1 - Shyamjos.com (c) 2018 \n-------------------------------------------------------------\n Index of previously Downloaded episodes \n Note: Donot delete this file unless you want to redownload them all \n-------------------------------------------------------------\n')

url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"


def download_podcast(url,name):

	#fetch podcast url
	six_min_english = requests.get(url)
	#save http response
	html = six_min_english.text
	#parse all a tags with BS
	soup = BeautifulSoup(html,'html.parser')
	#save parsed a tags
	links = soup.find_all('a')

	# Extract Mp3 links from a tags
	for a in soup.findAll('a',href=re.compile('http.*\.mp3')):
    		#skip low quality version and already downloaded podcasts
		if not 'audio-nondrm-download-low' in (a['href']) and not (a['href']) in open('.podcast_index').read():
        		#extract  episode title from 'download' class and remove unwanted white spaces
			extract_filename=re.sub(' ', '_', (a['download']))
			filename=extract_filename.split(',')[1]
			print ("Downloading Podcast From:"+ ' ' + name + ' :' + filename)
			download_link = requests.get((a['href']), stream=True)
        		# Throw an error for bad status codes
			download_link.raise_for_status()
			with open(filename, 'wb') as handle:
				for block in download_link.iter_content(1024):
					handle.write(block)
        		# Add downloaded podcast url to .podcast_index file
			datestring = datetime.strftime(datetime.now(), '%Y/%m/%d_%H:%M:%S')
			with open('.podcast_index', 'a') as index_file:
				index_file.write(datestring +' ' + (a['href']) + "\n")
		else:
                   	continue

	print("Up To Date:"+' ' + name)

ans=True
while ans:
    print ("""
    1.Download BBC 6 Minute English Podcast
    2.Download BBC 6 Minute Vocabulary
    3.Download BBC 6 Minute Grammar 
    4.Download All (6 Minute English,Vocabulary,Grammar)
    5.Exit/Quit
    """)
    ans=input("What would you like to do? ") 
    if ans=="1": 
      name="6 Minute English"
      url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"
      download_podcast(url,name)   
      sys.exit(0)
    elif ans=="2":
      name="6 Minute Vocabulary"
      url="https://www.bbc.co.uk/programmes/p02pc9xz/episodes/downloads"
      download_podcast(url,name)
      sys.exit(0) 
    elif ans=="3":
      name="6 Minute Grammar"
      url="https://www.bbc.co.uk/programmes/p02pc9wq/episodes/downloads"
      download_podcast(url,name) 
      sys.exit(0)
    elif ans=="4":
      name="6 Minute English"
      url="https://www.bbc.co.uk/programmes/p02pc9tn/episodes/downloads"
      download_podcast(url,name)   
      
      name="6 Minute Vocabulary"
      url="https://www.bbc.co.uk/programmes/p02pc9xz/episodes/downloads"
      download_podcast(url,name) 
      
      
      name="6 Minute Grammar"
      url="https://www.bbc.co.uk/programmes/p02pc9wq/episodes/downloads"
      download_podcast(url,name) 
      sys.exit(0)
    elif ans=="5":
      sys.exit(0)
    elif ans !="":
      print("\n Not Valid Choice Try again")