From a3755a8cfcae4d2d86ee41aad238cbeccd14fd89 Mon Sep 17 00:00:00 2001
From: Xi Bai <baxtree@users.noreply.github.com>
Date: Tue, 23 Aug 2022 10:25:30 +0100
Subject: [PATCH 01/20] prepare for the new release (#74)

* support py310 and tensorflow 2.8
---
 Pipfile                                            | 14 +++++++-------
 README.md                                          |  5 +++--
 docker/docker-compose.yml                          |  6 ------
 requirements-dev.txt                               |  2 +-
 requirements-translation.txt                       |  2 +-
 requirements.txt                                   | 10 +++++-----
 subaligner/__init__.py                             |  2 ++
 subaligner/_version.py                             |  2 +-
 subaligner/trainer.py                              |  4 ++--
 subaligner/translator.py                           |  4 ++--
 tests/integration/feature/subaligner_train.feature |  2 +-
 11 files changed, 25 insertions(+), 28 deletions(-)
diff --git a/Pipfile b/Pipfile
index 871c0b0..55c4e39 100644
--- a/Pipfile
+++ b/Pipfile
@@ -13,7 +13,7 @@ snakeviz = "==2.1.0"
 line-profiler = "==3.0.2"
 scikit-build = "==0.11.1"
 radish-bdd = "~=0.13.3"
-pex = "==2.1.15"
+pex = "<=2.1.80"
 mypy = "==0.790"
 parameterized = "==0.8.1"
 pylint = "~=2.8.2"
@@ -45,7 +45,7 @@ google-auth-oauthlib = "==0.4.2"
 google-pasta = "~=0.2"
 graphviz = "==0.8.3"
 HeapDict = "==1.0.0"
-h5py = "~=2.10.0"
+h5py = "<=3.6.0"
 html5lib = "==1.0b9"
 hyperopt = "==0.2.4"
 idna = "==2.8"
@@ -62,7 +62,7 @@ Markdown = "==2.6.11"
 mccabe = "==0.6.1"
 msgpack-python = "==0.5.6"
 numba = ">=0.50.0"
-numpy = "<1.23.0"
+numpy = "<1.24.0"
 oauthlib = "==3.1.0"
 pbr = "==4.0.2"
 pluggy = "==0.13.1"
@@ -77,7 +77,7 @@ pylint = "==2.5.0"
 pyparsing = "==2.2.0"
 pyprof2calltree = "==1.4.3"
 pysrt = "==1.1.1"
-pysubs2 = "==0.2.4"
+pysubs2 = "<=1.4.2"
 pystack-debugger = "==0.8.0"
 python-dateutil = "==2.7.2"
 pytz = "==2018.4"
@@ -85,17 +85,17 @@ PyYAML = ">=4.2b1"
 requests = "~=2.25.1"
 requests-oauthlib = "==1.3.0"
 rsa = "==4.7"
-scipy = "~=1.5.4"
+scipy = "<=1.8.1"
 scikit-learn = ">=0.19.1"
 sentencepiece = "~=0.1.95"
 setuptools = ">=41.0.0"
 six = "~=1.15.0"
 tblib = "==1.3.2"
-tensorflow = ">=1.15.5,<2.8"
+tensorflow = ">=1.15.5,<2.9"
 termcolor = "==1.1.0"
 toml = "==0.10.0"
 toolz = "==0.9.0"
-torch = "~=1.8.1"
+torch = "<=1.12.0"
 tornado = "==5.1.0"
 transformers = "~=4.5.1"
 typing-extensions = "~=3.7.0"
diff --git a/README.md b/README.md
index 6cc2432..634adac 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,12 @@
 </div>
 
 [![Build Status](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml/badge.svg?branch=master)](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml?query=branch%3Amaster) ![Codecov](https://img.shields.io/codecov/c/github/baxtree/subaligner)
-[![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
+[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
 [![Documentation Status](https://readthedocs.org/projects/subaligner/badge/?version=latest)](https://subaligner.readthedocs.io/en/latest/?badge=latest)
 [![GitHub license](https://img.shields.io/github/license/baxtree/subaligner)](https://github.com/baxtree/subaligner/blob/master/LICENSE)
 [![PyPI](https://badge.fury.io/py/subaligner.svg)](https://badge.fury.io/py/subaligner)
-[![Docker](https://img.shields.io/docker/cloud/build/baxtree/subaligner?label=Docker&style=flat)](https://hub.docker.com/r/baxtree/subaligner/builds)
+[![Docker Build](https://img.shields.io/docker/cloud/build/baxtree/subaligner?label=Docker&style=flat)](https://hub.docker.com/r/baxtree/subaligner/builds)
+[![Docker Pulls](https://img.shields.io/docker/pulls/baxtree/subaligner)](https://hub.docker.com/r/baxtree/subaligner)
 [![Citation](https://zenodo.org/badge/228440472.svg)](https://doi.org/10.5281/zenodo.5603083)
 
 ## Supported Formats
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index b127ea5..424665b 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -7,12 +7,6 @@ services:
       dockerfile: Dockerfile-CentOS7
     image: baxtree/subaligner:${SUBALIGNER_VERSION}.el7
 
-  subaligner-centos8:
-    build:
-      context: ./
-      dockerfile: Dockerfile-CentOS8
-    image: baxtree/subaligner:${SUBALIGNER_VERSION}.el8
-
   subaligner-ubuntu18:
     build:
       context: ./
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 55e0085..d6b5f10 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,7 +7,7 @@ snakeviz==2.1.0
 line-profiler==3.1.0
 scikit-build==0.11.1
 radish-bdd~=0.13.3
-pex==2.1.34
+pex<=2.1.80
 mypy==0.931
 types-requests==2.27.9
 types-setuptools==57.4.9
diff --git a/requirements-translation.txt b/requirements-translation.txt
index 1fa5e3c..a400733 100644
--- a/requirements-translation.txt
+++ b/requirements-translation.txt
@@ -1,4 +1,4 @@
 pycountry~=20.7.3
 sentencepiece~=0.1.95
-torch~=1.8.1
+torch<1.13.0
 transformers~=4.5.1
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ff3a994..ee3ba41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ google-auth-oauthlib==0.4.2
 google-pasta~=0.2
 graphviz==0.8.3
 HeapDict==1.0.0
-h5py~=3.1.0
+h5py<=3.6.0
 html5lib==1.0b9
 hyperopt==0.2.4
 idna==2.8
@@ -38,7 +38,7 @@ mccabe==0.6.1
 networkx>=2.5.1
 msgpack-python==0.5.6
 numba>=0.50.0
-numpy<1.23.0
+numpy<1.24.0
 oauthlib==3.1.0
 pbr==4.0.2
 pluggy==0.13.1
@@ -51,19 +51,19 @@ pydot-ng==1.0.0
 pydotplus==2.0.2
 pyprof2calltree==1.4.3
 pysrt==1.1.1
-pysubs2==0.2.4
+pysubs2<=1.4.2
 pystack-debugger==0.8.0
 pytz==2018.4
 PyYAML>=4.2b1
 requests~=2.25.1
 requests-oauthlib==1.3.0
 rsa==4.7
-scipy~=1.5.4
+scipy<=1.8.1
 scikit-learn~=0.24.2
 setuptools>=41.0.0
 six~=1.15.0
 tblib==1.3.2
-tensorflow>=1.15.5,<2.8
+tensorflow>=1.15.5,<2.9
 termcolor==1.1.0
 toml==0.10.0
 toolz==0.9.0
diff --git a/subaligner/__init__.py b/subaligner/__init__.py
index f748777..e3206b2 100644
--- a/subaligner/__init__.py
+++ b/subaligner/__init__.py
@@ -1,5 +1,7 @@
+import os
 import multiprocessing as mp
 from ._version import __version__
 
 __all__ = ["__version__"]
 mp.set_start_method("spawn", force=True)
+os.environ["KMP_WARNINGS"] = "0"
diff --git a/subaligner/_version.py b/subaligner/_version.py
index cdd5ff7..0f6fa4c 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.2.4"
+__version__ = "0.2.5"
diff --git a/subaligner/trainer.py b/subaligner/trainer.py
index 95f5a6b..eb0c25e 100644
--- a/subaligner/trainer.py
+++ b/subaligner/trainer.py
@@ -315,8 +315,8 @@ def __extract_data_and_label_from_avs(
         train_data = [x for x in train_data if x is not None]
         labels = [x for x in labels if x is not None]
 
-        train_data = np.concatenate(train_data)
-        labels = np.concatenate(labels)
+        train_data: np.ndarray = np.concatenate(train_data)  # type: ignore
+        labels: np.ndarray = np.concatenate(labels)  # type: ignore
         self.__LOGGER.debug(
             "Data and labels extracted after {} seconds".format(
                 str(datetime.datetime.now() - extraction_start)
diff --git a/subaligner/translator.py b/subaligner/translator.py
index 78c548c..0cc504e 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -128,8 +128,8 @@ def translate(self, subs: List[SubRipItem]) -> List[SubRipItem]:
         num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
         self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
         for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
-            tokenizer = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
-            translated = self.lang_model.generate(**tokenizer)
+            input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
+            translated = self.lang_model.generate(**input_ids)
             translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
         for index in range(len(new_subs)):
             new_subs[index].text = translated_texts[index]
diff --git a/tests/integration/feature/subaligner_train.feature b/tests/integration/feature/subaligner_train.feature
index 68cb446..2fcda5a 100644
--- a/tests/integration/feature/subaligner_train.feature
+++ b/tests/integration/feature/subaligner_train.feature
@@ -53,7 +53,7 @@ Feature: Subaligner CLI
         Then it exits with code "21"
 
     @train @embedded-subtitle
-    Scenario: Test training on video and embedded subtitles
+    Scenario: Test training on video with embedded subtitles
         Given I have an audiovisual file directory "av_embedded"
         And I want to save the training output in directory "output"
         When I run the subaligner_train with subtitle selector "embedded:stream_index=0,file_extension=srt" and the following options

From 845878ee97b062ade2bbacdb6dfd457bd5893e8f Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Tue, 23 Aug 2022 10:36:23 +0100
Subject: [PATCH 02/20] update dependencies

---
 Pipfile          | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Pipfile b/Pipfile
index 55c4e39..5f09cf8 100644
--- a/Pipfile
+++ b/Pipfile
@@ -39,7 +39,7 @@ Cython = "~=0.29.22"
 dask = "<2022.1.0"
 decorator = "==4.3.0"
 distributed = "==1.13.0"
-filelock = "==3.0.12"
+filelock = "<4.0.0"
 google-auth = "==1.27.0"
 google-auth-oauthlib = "==0.4.2"
 google-pasta = "~=0.2"
diff --git a/requirements.txt b/requirements.txt
index ee3ba41..5d2a4a0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ Cython~=0.29.22
 dask<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
-filelock==3.0.12
+filelock<4.0.0
 google-auth==1.27.0
 google-auth-oauthlib==0.4.2
 google-pasta~=0.2

From db44020e3989167eba8031730ce123f9517b9367 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 27 Feb 2023 09:43:49 +0000
Subject: [PATCH 03/20] issue-75 utilise opus tc big models for translation

---
 Pipfile                                      |  2 +-
 requirements-translation.txt                 |  2 +-
 subaligner/translator.py                     | 84 +++++++++++++++-----
 tests/integration/feature/subaligner.feature |  1 +
 4 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/Pipfile b/Pipfile
index 5f09cf8..4abbaad 100644
--- a/Pipfile
+++ b/Pipfile
@@ -97,7 +97,7 @@ toml = "==0.10.0"
 toolz = "==0.9.0"
 torch = "<=1.12.0"
 tornado = "==5.1.0"
-transformers = "~=4.5.1"
+transformers = "<4.27.0"
 typing-extensions = "~=3.7.0"
 urllib3 = "~=1.26.5"
 Werkzeug = ">=0.15.3"
diff --git a/requirements-translation.txt b/requirements-translation.txt
index a400733..e12de75 100644
--- a/requirements-translation.txt
+++ b/requirements-translation.txt
@@ -1,4 +1,4 @@
 pycountry~=20.7.3
 sentencepiece~=0.1.95
 torch<1.13.0
-transformers~=4.5.1
\ No newline at end of file
+transformers<4.27.0
\ No newline at end of file
diff --git a/subaligner/translator.py b/subaligner/translator.py
index 0cc504e..28277fa 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -16,6 +16,7 @@ class Translator(metaclass=Singleton):
 
     __TENSOR_TYPE = "pt"
     __OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
+    __OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
     __OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
     __TRANSLATING_BATCH_SIZE = 10
     __LANGUAGE_CODE_MAPPER = {
@@ -140,59 +141,100 @@ def __initialise_model(self, src_lang: str, tgt_lang: str) -> None:
         src_lang = Translator.normalise_single(src_lang)
         tgt_lang = Translator.normalise_single(tgt_lang)
         src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
+
+        if self.__download_mt_model(src_lang, tgt_lang):
+            return
+        elif self.__download_mt_tc_big_model(src_lang, tgt_lang):
+            return
+        elif self.__download_tatoeba_model(src_lang, tgt_lang):
+            return
+        else:
+            message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
+            self.__LOGGER.error(message)
+            raise NotImplementedError(message)
+
+    def __download_mt_model(self, src_lang: str, tgt_lang: str) -> bool:
         try:
             mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_MT.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_MT.format(src_lang, tgt_lang)
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
+        return False
+
+    def __download_mt_tc_big_model(self, src_lang: str, tgt_lang: str) -> bool:
+        try:
+            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
+            self.__download(mt_tc_model_name)
+            return True
+        except OSError:
+            self.__log_and_back_off(mt_tc_model_name)
+        try:
+            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
+            self.__download(mt_tc_model_name)
+            return True
+        except OSError:
+            self.__log_and_back_off(mt_tc_model_name)
+        try:
+            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
+            self.__download(mt_tc_model_name)
+            return True
+        except OSError:
+            self.__log_and_back_off(mt_tc_model_name)
+        try:
+            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, tgt_lang)
+            self.__download(mt_tc_model_name)
+            return True
+        except OSError:
+            self.__log_and_back_off(mt_tc_model_name)
+        return False
+
+    def __download_tatoeba_model(self, src_lang: str, tgt_lang: str) -> bool:
         try:
             mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
             mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, tgt_lang)
-            self.__download_mt_model(mt_model_name)
-            return
+            self.__download(mt_model_name)
+            return True
         except OSError:
-            self.__LOGGER.debug("Cannot download the MT model %s" % mt_model_name)
-            message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
-            self.__LOGGER.error(message)
-            raise NotImplementedError(message)
+            self.__log_and_back_off(mt_model_name)
+        return False
 
-    def __download_mt_model(self, mt_model_name: str) -> None:
+    def __download(self, mt_model_name: str) -> None:
         self.__LOGGER.debug("Trying to download the MT model %s" % mt_model_name)
         self.tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
         self.lang_model = MarianMTModel.from_pretrained(mt_model_name)
diff --git a/tests/integration/feature/subaligner.feature b/tests/integration/feature/subaligner.feature
index acefc16..2858b79 100644
--- a/tests/integration/feature/subaligner.feature
+++ b/tests/integration/feature/subaligner.feature
@@ -230,6 +230,7 @@ Feature: Subaligner CLI
         |  subaligner       |  single   |  "test.srt"       |   eng,zho         |   "test_aligned.srt"      |
         |  subaligner       |  dual     |  "test.srt"       |   eng,spa         |   "test_aligned.srt"      |
         |  subaligner       |  script   |  "test_plain.txt" |   eng,ita         |   "test_aligned.srt"      |
+        |  subaligner       |  script   |  "test_plain.txt" |   eng,por         |   "test_aligned.srt"      |
         |  subaligner_1pass |  <NULL>   |  "test.srt"       |   eng,fra         |   "test_aligned.srt"      |
         |  subaligner_2pass |  <NULL>   |  "test.srt"       |   eng,deu         |   "test_aligned.srt"      |
 

From 270e92df67437a008e66ff0f3973a103d6e19d4b Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 27 Feb 2023 12:25:51 +0000
Subject: [PATCH 04/20] specify workflow target branches

---
 .github/workflows/ci-pipeline.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
index 8a6fe6e..b0a3227 100644
--- a/.github/workflows/ci-pipeline.yml
+++ b/.github/workflows/ci-pipeline.yml
@@ -1,9 +1,11 @@
 name: ci pipeline
 
 on:
-  - push
-  - pull_request
-  - workflow_dispatch
+  push:
+    branches: [ master, development ]
+  pull_request:
+    branches: [ master, development ]
+  workflow_dispatch:
 
 jobs:
   main:

From 1baa9a9f9c3b6b7ac6527b187da6749c4638cdb5 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 10 Mar 2023 09:43:42 +0000
Subject: [PATCH 05/20] generate subtitle via transcription

---
 .github/workflows/ci-pipeline.yml             |   2 +-
 Makefile                                      |  16 +--
 Pipfile                                       |   3 +-
 README.md                                     |  17 +--
 docker/Dockerfile-ArchLinux                   |   2 +-
 ...ts-translation.txt => requirements-llm.txt |   3 +-
 setup.py                                      |  11 +-
 site/source/acknowledgement.rst               |   1 +
 site/source/index.rst                         |   4 +
 site/source/installation.rst                  |   2 +-
 site/source/usage.rst                         |  12 +-
 subaligner/__main__.py                        | 110 +++++++++++-----
 subaligner/exception.py                       |   4 +
 subaligner/predictor.py                       |   2 +-
 subaligner/subaligner_1pass/__main__.py       |   2 +-
 subaligner/subaligner_2pass/__main__.py       |   2 +-
 subaligner/subaligner_batch/__main__.py       |   2 +-
 subaligner/subaligner_convert/__main__.py     |   2 +-
 subaligner/subtitle.py                        |  15 +++
 subaligner/transcriber.py                     | 118 ++++++++++++++++++
 tests/integration/feature/subaligner.feature  |  11 ++
 tests/integration/radish/step.py              |  14 +++
 tests/subaligner/test_transcriber.py          |  43 +++++++
 23 files changed, 335 insertions(+), 63 deletions(-)
 rename requirements-translation.txt => requirements-llm.txt (54%)
 create mode 100644 subaligner/transcriber.py
 create mode 100644 tests/subaligner/test_transcriber.py

diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
index b0a3227..169abcc 100644
--- a/.github/workflows/ci-pipeline.yml
+++ b/.github/workflows/ci-pipeline.yml
@@ -30,7 +30,7 @@ jobs:
           python -m pip install --upgrade pip
           cat requirements.txt | xargs -L 1 pip install
           cat requirements-stretch.txt | xargs -L 1 pip install
-          cat requirements-translation.txt | xargs -L 1 pip install
+          cat requirements-llm.txt | xargs -L 1 pip install
           cat requirements-dev.txt | xargs -L 1 pip install
           cat requirements-site.txt | xargs -L 1 pip install
           pip install -e . --ignore-installed
diff --git a/Makefile b/Makefile
index 3bf1626..4c414da 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ install:
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	.$(PYTHON)/bin/pip install -e . --ignore-installed
 	cp ./bin/subaligner_1pass .$(PYTHON)/bin/subaligner_1pass
@@ -55,7 +55,7 @@ install-basic:
 	.$(PYTHON)/bin/pip install -e '.' --no-cache-dir
 
 install-translation:
-	.$(PYTHON)/bin/pip install -e '.[translation]' --no-cache-dir
+	.$(PYTHON)/bin/pip install -e '.[llm]' --no-cache-dir
 
 install-stretch:
 	.$(PYTHON)/bin/pip install -e '.[stretch]' --no-cache-dir
@@ -82,7 +82,7 @@ test:
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install
 	PYTHONPATH=. .$(PYTHON)/bin/python -m unittest discover
 	-.$(PYTHON)/bin/pycodestyle subaligner tests examples misc bin/subaligner bin/subaligner_1pass bin/subaligner_2pass bin/subaligner_batch bin/subaligner_convert bin/subaligner_train  bin/subaligner_tune setup.py --ignore=E203,E501,W503 --exclude="subaligner/lib"
@@ -95,7 +95,7 @@ test-int: ## integration test
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install
 	.$(PYTHON)/bin/pip install -e . --ignore-installed
 	( \
@@ -108,7 +108,7 @@ pydoc: clean-doc ## generate pydoc HTML documentation based on docstrings
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	.$(PYTHON)/bin/python -m pydoc -w subaligner; mv subaligner.html docs/index.html
 	.$(PYTHON)/bin/python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs
 	.$(PYTHON)/bin/python -m pydoc -w subaligner.exception; mv subaligner.exception.html docs
@@ -131,7 +131,7 @@ coverage: ## check code coverage quickly with the default Python
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install
 	.$(PYTHON)/bin/coverage run --source subaligner -m unittest discover
 	.$(PYTHON)/bin/coverage report
@@ -167,7 +167,7 @@ profile:
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-stretch.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	cat requirements-translation.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
+	cat requirements-llm.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
 	.$(PYTHON)/bin/python -c "import misc.profiler; misc.profiler.generate_profiles()"
 	.$(PYTHON)/bin/kernprof -v -l ./misc/profiler.py
@@ -176,7 +176,7 @@ app: clean-wheels
 	if [ ! -e ".$(PYTHON)" ]; then ~/.pyenv/versions/$(PYTHON)/bin/python3 -m venv .$(PYTHON); fi
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	.$(PYTHON)/bin/pip wheel --no-cache-dir --wheel-dir=./wheels -r requirements.txt -r requirements-stretch.txt -r requirements-translation.txt; \
+	.$(PYTHON)/bin/pip wheel --no-cache-dir --wheel-dir=./wheels -r requirements.txt -r requirements-stretch.txt -r requirements-llm.txt; \
 	STRETCH_OFF=True .$(PYTHON)/bin/python setup.py bdist_wheel -d ./wheels; \
 	.$(PYTHON)/bin/pex subaligner==$(SUBALIGNER_VERSION) --repo=./wheels --platform $(PLATFORM) --no-pypi --no-build --python-shebang="/usr/bin/env python3" -e subaligner -o subaligner-$(PLATFORM).app; \
 
diff --git a/Pipfile b/Pipfile
index 4abbaad..934cca0 100644
--- a/Pipfile
+++ b/Pipfile
@@ -64,6 +64,7 @@ msgpack-python = "==0.5.6"
 numba = ">=0.50.0"
 numpy = "<1.24.0"
 oauthlib = "==3.1.0"
+openai-whisper = "==20230124"
 pbr = "==4.0.2"
 pluggy = "==0.13.1"
 psutil = "==5.6.7"
@@ -95,7 +96,7 @@ tensorflow = ">=1.15.5,<2.9"
 termcolor = "==1.1.0"
 toml = "==0.10.0"
 toolz = "==0.9.0"
-torch = "<=1.12.0"
+torch = "<1.13.0"
 tornado = "==5.1.0"
 transformers = "<4.27.0"
 typing-extensions = "~=3.7.0"
diff --git a/README.md b/README.md
index 634adac..2ed868a 100644
--- a/README.md
+++ b/README.md
@@ -34,9 +34,9 @@ $ pip install subaligner
 
 ## Installation with Optional Packages Supporting Additional Features
 ```
-# Install dependencies for enabling translation
+# Install dependencies for enabling translation and transcription
 
-$ pip install 'subaligner[translation]'
+$ pip install 'subaligner[llm]'
 ```
 ```
 # Install dependencies for enabling forced alignment
@@ -118,6 +118,10 @@ $ subaligner -m single -v https://example.com/video.mp4 -s https://example.com/s
 $ subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 ```
 ```
+# Generate subtitles by transcribing audiovisual files
+$ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
+```
+```
 # Alignment on segmented plain texts (double newlines as the delimiter)
 
 $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
@@ -137,15 +141,11 @@ $ subaligner -m dual -v video.mkv -s embedded:stream_index=0 -o subtitle_aligned
 ```
 ```
 # Translative alignment with the ISO 639-3 language code pair (src,tgt)
-
-$ subaligner_1pass --languages
-$ subaligner_1pass -v video.mp4 -s subtitle.srt -t src,tgt
-$ subaligner_2pass --languages
-$ subaligner_2pass -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner --languages
 $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
+$ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
 ```
 ```
 # Shift subtitle manually by offset in seconds
@@ -214,6 +214,7 @@ This tool wouldn't be possible without the following packages:
 [pysrt](https://github.com/byroot/pysrt)
 [pysubs2](https://github.com/tkarabela/pysubs2)
 [aeneas](https://www.readbeyond.it/aeneas/)
-[transformers](https://huggingface.co/transformers/).
+[transformers](https://huggingface.co/transformers/)
+[openai-whisper](https://github.com/openai/whisper).
 
 Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
diff --git a/docker/Dockerfile-ArchLinux b/docker/Dockerfile-ArchLinux
index e927e20..8f1c49b 100644
--- a/docker/Dockerfile-ArchLinux
+++ b/docker/Dockerfile-ArchLinux
@@ -20,7 +20,7 @@ RUN ["/bin/bash", "-c", "pacman --noconfirm -Syu &&\
     python -m pip install --upgrade pip &&\
     python -m pip install wheel &&\
     python -m pip install \"subaligner==${RELEASE_VERSION}\" &&\
-    python -m pip install \"subaligner[translation]==${RELEASE_VERSION}\""]
+    python -m pip install \"subaligner[llm]==${RELEASE_VERSION}\""]
 
 COPY ./scripts/entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
diff --git a/requirements-translation.txt b/requirements-llm.txt
similarity index 54%
rename from requirements-translation.txt
rename to requirements-llm.txt
index e12de75..fbe39c8 100644
--- a/requirements-translation.txt
+++ b/requirements-llm.txt
@@ -1,4 +1,5 @@
 pycountry~=20.7.3
 sentencepiece~=0.1.95
 torch<1.13.0
-transformers<4.27.0
\ No newline at end of file
+transformers<4.27.0
+openai-whisper==20230124
\ No newline at end of file
diff --git a/setup.py b/setup.py
index dd82aea..909ee15 100644
--- a/setup.py
+++ b/setup.py
@@ -20,18 +20,19 @@
 with open("requirements-site.txt") as docs_requirements_file:
     docs_requirements = docs_requirements_file.read().splitlines()[::-1]
 
-with open("requirements-translation.txt") as translate_requirements_file:
-    translate_requirements = translate_requirements_file.read().splitlines()[::-1]
+with open("requirements-llm.txt") as llm_requirements_file:
+    llm_requirements = llm_requirements_file.read().splitlines()[::-1]
 
 with open("requirements-dev.txt") as dev_requirements_file:
     dev_requirements = dev_requirements_file.read().splitlines()[::-1]
 
 EXTRA_DEPENDENCIES = {
-    "harmony": stretch_requirements + translate_requirements,
-    "dev": dev_requirements + stretch_requirements + translate_requirements + docs_requirements,
+    "harmony": stretch_requirements + llm_requirements,
+    "dev": dev_requirements + stretch_requirements + llm_requirements + docs_requirements,
     "docs": docs_requirements,
     "stretch": stretch_requirements,
-    "translation": translate_requirements,
+    "translation": llm_requirements,    # for backward compatibility and will be deprecated with "llm"
+    "llm": llm_requirements,
 }
 
 setup(name="subaligner",
diff --git a/site/source/acknowledgement.rst b/site/source/acknowledgement.rst
index 437da30..108c6c3 100644
--- a/site/source/acknowledgement.rst
+++ b/site/source/acknowledgement.rst
@@ -12,5 +12,6 @@ Acknowledgement
     - `pysubs2 <https://github.com/tkarabela/pysubs2>`_
     - `aeneas <https://www.readbeyond.it/aeneas/>`_
     - `transformers <https://huggingface.co/transformers/>`_
+    - `openai-whisper <https://github.com/openai/whisper>`_
 
 Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
diff --git a/site/source/index.rst b/site/source/index.rst
index ed870ca..7c0feb1 100644
--- a/site/source/index.rst
+++ b/site/source/index.rst
@@ -25,6 +25,10 @@ to developers wanting to perform those tasks programmatically. Moreover, with ex
 hand, advanced users can train their own synchronisers with a single command and zero setup. A handful of subtitle formats are supported
 and can be converted from one to another either during synchronisation and translation or on on-demand.
 
+Even without any subtitles available beforehand, Subaligner provides transcription by utilising SOTA Large Language
+models. This pipeline, combined with translation, can generate near ready-to-use subtitles of increasingly higher quality in
+various languages and formats which cater to your preferences, thanks to those models continually advancing over time.
+
 Subligner supports the following subtitle formats: SubRip, TTML, WebVTT, (Advanced) SubStation Alpha, MicroDVD, MPL2, TMP,
 EBU STL, SAMI, SCC and SBV. The source code can be found on GitHub: `subaligner <https://github.com/baxtree/subaligner>`_.
 
diff --git a/site/source/installation.rst b/site/source/installation.rst
index c9672ba..1568f23 100644
--- a/site/source/installation.rst
+++ b/site/source/installation.rst
@@ -19,7 +19,7 @@ Installation
 
 **Install dependencies for enabling translation**::
 
-    $ pip install 'subaligner[translation]'
+    $ pip install 'subaligner[llm]'
 
 **Pre-install additional dependencies before installing subaligner[stretch] or subaligner[dev]**::
 
diff --git a/site/source/usage.rst b/site/source/usage.rst
index 93688c8..0ecfbea 100644
--- a/site/source/usage.rst
+++ b/site/source/usage.rst
@@ -7,6 +7,9 @@ lower latency and shifts all subtitle segments globally. The latter way has high
 segments individually with an option of stretching each segment. Multilingual translation on subtitles can be achieved
 together with the alignment in one go or separately (see in :ref:`Advanced Usage`).
 
+With no subtitles in your hand beforehand, Subligner's transcribe mode utilises Large Language Models (LLMs) to transcribe
+audiovisual content and generates subtitles in various formats which suit your needs.
+
 Make sure you have got the virtual environment activated upfront.
 
 **Single-stage alignment (high-level shift with lower latency)**::
@@ -26,6 +29,10 @@ Make sure you have got the virtual environment activated upfront.
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt
     (.venv) $ subaligner -m dual -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
 
+**Generate subtitles by transcribing audiovisual files**::
+
+    (.venv) $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
+
 **Alignment on segmented plain texts (double newlines as the delimiter)**::
 
     (.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
@@ -44,14 +51,11 @@ Make sure you have got the virtual environment activated upfront.
 
 **Translative alignment with the ISO 639-3 language code pair (src,tgt)**::
 
-    (.venv) $ subaligner_1pass --languages
-    (.venv) $ subaligner_1pass -v video.mp4 -s subtitle.srt -t src,tgt
-    (.venv) $ subaligner_2pass --languages
-    (.venv) $ subaligner_2pass -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner --languages
     (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
+    (.venv) $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
 
 **Shift subtitle manually by offset in seconds**::
 
diff --git a/subaligner/__main__.py b/subaligner/__main__.py
index 774fbbb..4afa02f 100755
--- a/subaligner/__main__.py
+++ b/subaligner/__main__.py
@@ -1,13 +1,17 @@
 #!/usr/bin/env python
 """
-usage: subaligner [-h] [-m {single,dual,script,shift}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
+usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
                   [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
-                  [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS] [-lgs] [-d] [-q] [-ver]
+                  [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
+                  [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
+                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
 
 Subaligner command line interface
 
 optional arguments:
   -h, --help            show this help message and exit
+  -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
+                        File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
   -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
                         Max global log loss for alignment
   -so, --stretch_on     Switch on stretch on subtitles)
@@ -23,18 +27,22 @@
                         Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
   -os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
                         Offset by which the subtitle will be shifted
+  -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
+                        Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
+  -mr {whisper}, --llm_recipe {whisper}
+                        LLM recipe used for transcribing video files
+  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
+                        Flavour variation for a specific LLM recipe
   -lgs, --languages     Print out language codes used for stretch and translation
   -d, --debug           Print out debugging information
   -q, --quiet           Switch off logging information
   -ver, --version       show program's version number and exit
 
 required arguments:
-  -m {single,dual,script,shift}, --mode {single,dual,script,shift}
-                        Alignment mode: either single or dual
+  -m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
+                        Alignment mode: single, dual, script, shift or transcribe
   -v VIDEO_PATH, --video_path VIDEO_PATH
                         File path or URL to the video file
-  -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
-                        File path or URL to the subtitle file (Extensions of supported subtitles: .sami, .ssa, .vtt, .xml, .sub, .smi, .ass, .srt, .tmp, .dfxp, .stl, .ttml, .sbv, .txt, .ytt, .scc) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
 """
 
 import argparse
@@ -61,10 +69,10 @@ def main():
     required_args.add_argument(
         "-m",
         "--mode",
-        type=str,
+        type=str.lower,
         default="",
-        choices=["single", "dual", "script", "shift"],
-        help="Alignment mode: either single or dual",
+        choices=["single", "dual", "script", "shift", "transcribe"],
+        help="Alignment mode: single, dual, script, shift or transcribe",
     )
     required_args.add_argument(
         "-v",
@@ -74,7 +82,7 @@ def main():
         help="File path or URL to the video file",
     )
     from subaligner.subtitle import Subtitle
-    required_args.add_argument(
+    parser.add_argument(
         "-s",
         "--subtitle_path",
         type=str,
@@ -100,7 +108,7 @@ def main():
     parser.add_argument(
         "-sil",
         "--stretch_in_language",
-        type=str,
+        type=str.lower,
         choices=Utils.get_stretch_language_codes(),
         default="eng",
         help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
@@ -137,6 +145,29 @@ def main():
         type=float,
         help="Offset by which the subtitle will be shifted"
     )
+    parser.add_argument(
+        "-ml",
+        "--main_language",
+        type=str.lower,
+        choices=Utils.get_stretch_language_codes(),
+        help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
+    )
+    parser.add_argument(
+        "-mr",
+        "--llm_recipe",
+        type=str.lower,
+        default="whisper",
+        choices=["whisper"],
+        help="LLM recipe used for transcribing video files"
+    )
+    parser.add_argument(
+        "-mf",
+        "--llm_flavour",
+        type=str.lower,
+        default="small",
+        choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
+        help="Flavour variation for a specific LLM recipe"
+    )
     parser.add_argument("-lgs", "--languages", action="store_true",
                         help="Print out language codes used for stretch and translation")
     parser.add_argument("-d", "--debug", action="store_true",
@@ -153,33 +184,45 @@ def main():
         print("ERROR: --mode was not passed in")
         parser.print_usage()
         sys.exit(21)
+
     FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
 
-    if not FLAGS.subtitle_path:
+    if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
         print("ERROR: --subtitle_path was not passed in")
         parser.print_usage()
         sys.exit(21)
-    if FLAGS.mode != "shift":
+    elif FLAGS.mode == "transcribe":
+        FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
+    if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
         for subtitle_path in FLAGS.subtitle_path:
             if FLAGS.video_path == "":
                 print("ERROR: --video_path was not passed in")
                 parser.print_usage()
                 sys.exit(21)
             if subtitle_path.lower().startswith("http") and FLAGS.output == "":
-                print("ERROR: --output was not passed in for alignment on a remote subtitle file")
+                print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
                 parser.print_usage()
                 sys.exit(21)
             if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
-                print("ERROR: --output was not passed in for alignment on embedded subtitles")
+                print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
                 parser.print_usage()
                 sys.exit(21)
             if FLAGS.mode == "script" and FLAGS.output == "":
-                print("ERROR: --output was not passed in for alignment on plain texts")
+                print("ERROR: --output was not passed in but required by alignment on plain texts")
                 parser.print_usage()
                 sys.exit(21)
-            if FLAGS.translate is not None:
+            if FLAGS.mode == "transcribe":
+                if FLAGS.output == "":
+                    print("ERROR: --output was not passed in but required by mode 'transcribe'")
+                    parser.print_usage()
+                    sys.exit(21)
+                if FLAGS.main_language is None:
+                    print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
+                    parser.print_usage()
+                    sys.exit(21)
+            if FLAGS.translate is not None or FLAGS.mode == "transcribe":
                 if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-                    print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
+                    print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
                     sys.exit(21)
             if FLAGS.stretch_on or FLAGS.mode == "script":
                 if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
@@ -190,13 +233,13 @@ def main():
             local_subtitle_path = subtitle_path
             exit_segfail = FLAGS.exit_segfail
             stretch = FLAGS.stretch_on
-            stretch_in_lang = FLAGS.stretch_in_language
+            stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
 
             from subaligner.logger import Logger
             Logger.VERBOSE = FLAGS.debug
             Logger.QUIET = FLAGS.quiet
             from subaligner.predictor import Predictor
-            from subaligner.exception import UnsupportedFormatException
+            from subaligner.exception import UnsupportedFormatException, TranscriptionException
             from subaligner.exception import TerminalException
 
             try:
@@ -230,6 +273,7 @@ def main():
                         parser.print_usage()
                         sys.exit(21)
 
+                voice_probabilities = None
                 predictor = Predictor()
                 if FLAGS.mode == "single":
                     aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
@@ -252,6 +296,11 @@ def main():
                         subtitle_file_path=local_subtitle_path,
                         stretch_in_lang=stretch_in_lang,
                     )
+                elif FLAGS.mode == "transcribe":
+                    from subaligner.transcriber import Transcriber
+                    transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
+                    subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
+                    aligned_subs = subtitle.subs
                 else:
                     print("ERROR: Unknown mode {}".format(FLAGS.mode))
                     parser.print_usage()
@@ -267,6 +316,9 @@ def main():
                     aligned_subs = translator.translate(aligned_subs)
                     Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
                                                         frame_rate, "utf-8")
+                elif FLAGS.mode == "transcribe":
+                    Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
+                                                        frame_rate, "utf-8")
                 else:
                     Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
                                                         frame_rate)
@@ -277,35 +329,35 @@ def main():
                         print(
                             "ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
                         )
-                        _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
+                        _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
                         sys.exit(22)
 
                 print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
-            except UnsupportedFormatException as e:
+            except (UnsupportedFormatException, TranscriptionException) as e:
                 print(
                     "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
                 )
                 traceback.print_tb(e.__traceback__)
-                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
+                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
                 sys.exit(23)
             except TerminalException as e:
                 print(
                     "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
                 )
                 traceback.print_tb(e.__traceback__)
-                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
+                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
                 sys.exit(24)
             except Exception as e:
                 print(
                     "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
                 )
                 traceback.print_tb(e.__traceback__)
-                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
+                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
                 sys.exit(1)
             else:
-                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
+                _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
         sys.exit(0)
-    else:
+    elif FLAGS.mode == "shift":
         if FLAGS.offset_seconds is None:
             print("ERROR: --offset_seconds was not passed in during subtitle shifting")
             sys.exit(21)
@@ -319,11 +371,13 @@ def main():
         sys.exit(0)
 
 
-def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path):
+def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
     if video_path.lower().startswith("http") and os.path.exists(local_video_path):
         os.remove(local_video_path)
     if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
         os.remove(local_subtitle_path)
+    if mode == "transcribe" and os.path.exists(local_subtitle_path):
+        os.remove(local_subtitle_path)
 
 
 if __name__ == "__main__":
diff --git a/subaligner/exception.py b/subaligner/exception.py
index 7e0acc3..cb35725 100644
--- a/subaligner/exception.py
+++ b/subaligner/exception.py
@@ -8,3 +8,7 @@ class TerminalException(Exception):
 
 class NoFrameRateException(Exception):
     """ An exception raised due to frame rate not found."""
+
+
+class TranscriptionException(Exception):
+    """ An exception raised due to transcription failures."""
diff --git a/subaligner/predictor.py b/subaligner/predictor.py
index ab69177..bede1d2 100644
--- a/subaligner/predictor.py
+++ b/subaligner/predictor.py
@@ -37,7 +37,7 @@ class Predictor(metaclass=Singleton):
     __SEGMENT_PREDICTION_TIMEOUT = 60  # Maximum waiting time in seconds when predicting each segment
 
     __THREAD_QUEUE_SIZE = 8
-    __THREAD_NUMBER = 4
+    __THREAD_NUMBER = 1  # Do not change
 
     def __init__(self, **kwargs) -> None:
         """Feature predictor initialiser.
diff --git a/subaligner/subaligner_1pass/__main__.py b/subaligner/subaligner_1pass/__main__.py
index c485c77..73465a0 100755
--- a/subaligner/subaligner_1pass/__main__.py
+++ b/subaligner/subaligner_1pass/__main__.py
@@ -120,7 +120,7 @@ def main():
         sys.exit(21)
     if FLAGS.translate is not None:
         if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
+            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
             sys.exit(21)
 
     local_video_path = FLAGS.video_path
diff --git a/subaligner/subaligner_2pass/__main__.py b/subaligner/subaligner_2pass/__main__.py
index dd21e2f..b2c5ffa 100755
--- a/subaligner/subaligner_2pass/__main__.py
+++ b/subaligner/subaligner_2pass/__main__.py
@@ -147,7 +147,7 @@ def main():
         sys.exit(21)
     if FLAGS.translate is not None:
         if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
+            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
             sys.exit(21)
     if FLAGS.stretch_on:
         if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
diff --git a/subaligner/subaligner_batch/__main__.py b/subaligner/subaligner_batch/__main__.py
index 255630a..002688e 100755
--- a/subaligner/subaligner_batch/__main__.py
+++ b/subaligner/subaligner_batch/__main__.py
@@ -173,7 +173,7 @@ def main():
         sys.exit(21)
     if FLAGS.translate is not None:
         if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
+            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
             sys.exit(21)
 
     video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
diff --git a/subaligner/subaligner_convert/__main__.py b/subaligner/subaligner_convert/__main__.py
index 68b7220..ee521a4 100755
--- a/subaligner/subaligner_convert/__main__.py
+++ b/subaligner/subaligner_convert/__main__.py
@@ -99,7 +99,7 @@ def main():
         sys.exit(21)
     if FLAGS.translate is not None:
         if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
+            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
             sys.exit(21)
 
     local_subtitle_path = FLAGS.input_subtitle_path
diff --git a/subaligner/subtitle.py b/subaligner/subtitle.py
index 81541c7..105641e 100644
--- a/subaligner/subtitle.py
+++ b/subaligner/subtitle.py
@@ -59,6 +59,8 @@ def __init__(self, secret: object, subtitle_file_path: str, subtitle_format: str
 
         if subtitle_format == "subrip":
             self.__subs = self.__load_subrip(subtitle_file_path)
+        elif subtitle_format == "subrip_raw":
+            self.__subs = pysrt.SubRipFile().from_string(subtitle_file_path)
         elif subtitle_format == "ttml":
             self.__subs = self.__convert_ttml_to_subs(subtitle_file_path)
         elif subtitle_format == "webvtt":
@@ -105,6 +107,19 @@ def load_subrip(cls, subtitle_file_path: str) -> "Subtitle":
 
         return cls(cls.__secret, subtitle_file_path, "subrip")
 
+    @classmethod
+    def load_subrip_str(cls, subrip_raw: str) -> "Subtitle":
+        """Load a SubRip subtitle string.
+
+        Arguments:
+            subrip_str {string} -- The string representation of the SubRip content.
+
+        Returns:
+            Subtitle -- Subtitle object.
+        """
+
+        return cls(cls.__secret, subrip_raw, "subrip_raw")
+
     @classmethod
     def load_ttml(cls, subtitle_file_path: str) -> "Subtitle":
         """Load a TTML subtitle file.
diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py
new file mode 100644
index 0000000..3dc2b77
--- /dev/null
+++ b/subaligner/transcriber.py
@@ -0,0 +1,118 @@
+import os
+import whisper
+from enum import Enum
+from typing import Tuple, Optional
+from pysrt import SubRipTime
+from whisper.tokenizer import LANGUAGES
+from .translator import Translator
+from .subtitle import Subtitle
+from .media_helper import MediaHelper
+from .logger import Logger
+from .exception import NoFrameRateException, TranscriptionException
+
+
+class Transcriber(object):
+    """Transcribe audiovisual content for subtitle generation.
+    """
+
+    def __init__(self, recipe: str = "whisper", flavour: str = "small") -> None:
+        """Initialiser for the transcribing process.
+
+        Arguments:
+            recipe {string} -- the LLM recipe used for transcribing video files (default: "whisper").
+            flavour {string} -- the flavour variation for a specific LLM recipe (default: "small").
+        Raises:
+            NotImplementedError -- Thrown when the LLM recipe is unknown.
+        """
+        if recipe not in [r.value for r in Recipe]:
+            raise NotImplementedError(f"Unknown recipe: {recipe}")
+        if recipe == Recipe.whisper.value:
+            if flavour not in [f.value for f in WhisperFlavour]:
+                raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
+            self.__model = whisper.load_model(flavour)
+        self.recipe = recipe
+        self.flavour = flavour
+        self.__media_helper = MediaHelper()
+        self.__LOGGER = Logger().get_logger(__name__)
+
+    def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle, Optional[float]]:
+        """Transcribe an audiovisual file and generate subtitles.
+
+        Arguments:
+            video_file_path {string} -- The input video file path.
+            language_code {string} -- An alpha 3 language code derived from ISO 639-3.
+        Raises:
+            TranscriptionException -- Thrown when transcription is failed.
+            NotImplementedError -- Thrown when the LLM recipe is not supported.
+        """
+        if self.recipe == "whisper":
+            lang = Translator.get_iso_639_alpha_2(language_code)
+            if lang not in LANGUAGES:
+                raise TranscriptionException(f'"{language_code}" is not supported by {self.recipe} ({self.flavour})')
+            audio_file_path = self.__media_helper.extract_audio(video_file_path, True, 16000)
+            try:
+                audio = whisper.load_audio(audio_file_path)
+                self.__LOGGER.debug("Start transcribing the audio...")
+                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
+                self.__LOGGER.info("Finished transcribing the audio")
+                srt_str = ""
+                for i, segment in enumerate(result["segments"], start=1):
+                    srt_str += f"{i}\n" \
+                               f"{self.__format_timestamp(segment['start'])} --> {self.__format_timestamp(segment['end'])}\n" \
+                               f"{segment['text'].strip().replace('-->', '->')}\n" \
+                               "\n"
+                subtitle = Subtitle.load_subrip_str(srt_str)
+                subtitle, frame_rate = self.__on_frame_timecodes(subtitle, video_file_path)
+                self.__LOGGER.debug("Generated the raw subtitle")
+                return subtitle, frame_rate
+            finally:
+                if os.path.exists(audio_file_path):
+                    os.remove(audio_file_path)
+        else:
+            raise NotImplementedError(f"{self.recipe} ({self.flavour}) is not supported")
+
+    @staticmethod
+    def __format_timestamp(seconds: float) -> str:
+        assert seconds >= 0, "non-negative timestamp expected"
+        milliseconds = round(seconds * 1000.0)
+        hours = milliseconds // 3_600_000
+        milliseconds -= hours * 3_600_000
+        minutes = milliseconds // 60_000
+        milliseconds -= minutes * 60_000
+        seconds = milliseconds // 1_000
+        milliseconds -= seconds * 1_000
+        hours_marker = f"{hours:02d}:"
+        return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+
+    def __on_frame_timecodes(self, subtitle: Subtitle, video_file_path: str) -> Tuple[Subtitle, Optional[float]]:
+        frame_rate = None
+        try:
+            frame_rate = self.__media_helper.get_frame_rate(video_file_path)
+            frame_duration = 1.0 / frame_rate
+            for sub in subtitle.subs:
+                start_seconds = sub.start.hours * 3600 + sub.start.minutes * 60 + sub.start.seconds + sub.start.milliseconds / 1000.0
+                end_seconds = sub.end.hours * 3600 + sub.end.minutes * 60 + sub.end.seconds + sub.end.milliseconds / 1000.0
+                start_frames = int(start_seconds / frame_duration)
+                end_frames = int(end_seconds / frame_duration)
+                sub.start = SubRipTime(seconds=start_frames * frame_duration)
+                sub.end = SubRipTime(seconds=end_frames * frame_duration)
+        except NoFrameRateException:
+            self.__LOGGER.warning("Cannot detect the frame rate for %s" % video_file_path)
+        return subtitle, frame_rate
+
+
+class Recipe(str, Enum):
+    whisper = "whisper"
+
+
+class WhisperFlavour(str, Enum):
+    tiny = "tiny"
+    tiny_en = "tiny.en"
+    small = "small"
+    medium = "medium"
+    medium_en = "medium.en"
+    base = "base"
+    base_en = "base.en"
+    large_v1 = "large-v1"
+    large_v2 = "large-v2"
+    large = "large"
diff --git a/tests/integration/feature/subaligner.feature b/tests/integration/feature/subaligner.feature
index 2858b79..d68f56f 100644
--- a/tests/integration/feature/subaligner.feature
+++ b/tests/integration/feature/subaligner.feature
@@ -234,6 +234,17 @@ Feature: Subaligner CLI
         |  subaligner_1pass |  <NULL>   |  "test.srt"       |   eng,fra         |   "test_aligned.srt"      |
         |  subaligner_2pass |  <NULL>   |  "test.srt"       |   eng,deu         |   "test_aligned.srt"      |
 
+    @transcription
+    Scenario Outline: Test transcription on audiovisual input and subtitle generation
+        Given I have a video file <video-in>
+        And I have a subtitle file <subtitle-in>
+        When I run the alignment with <aligner> on them with <mode> stage with <language> language, <recipe> recipe and <flavour> flavour
+        Then a new subtitle file <subtitle-out> is generated
+    Examples:
+        |   video-in    |   aligner     |  mode         |  subtitle-in      |   language    |   recipe      |   flavour     |   subtitle-out        |
+        |   "test.mp4"  |   subaligner  |  transcribe   |  "test.srt"       |   eng         |   whisper     |   tiny        |   "test_aligned.srt"  |
+        |   "test.wav"  |   subaligner  |  transcribe   |  "test.srt"       |   eng         |   whisper     |   tiny        |   "test_aligned.srt"  |
+
     @batch
     Scenario Outline: Test batch alignment
         Given I have an audiovisual file directory "av"
diff --git a/tests/integration/radish/step.py b/tests/integration/radish/step.py
index 9da52cb..ddc52c9 100644
--- a/tests/integration/radish/step.py
+++ b/tests/integration/radish/step.py
@@ -105,6 +105,20 @@ def run_subaligner_with_translation(step, aligner, mode, language_pair):
     step.context.exit_code = process.wait(timeout=WAIT_TIMEOUT_IN_SECONDS)
 
 
+@when('I run the alignment with {aligner:S} on them with {mode:S} stage with {language:S} language, {recipe:S} recipe and {flavour:S} flavour')
+def run_subaligner_with_transcription(step, aligner, mode, language, recipe, flavour):
+    process = subprocess.Popen([
+        os.path.join(PWD, "..", "..", "..", "bin", aligner),
+        "-m", mode,
+        "-v", step.context.video_file_path,
+        "-ml", language,
+        "-mr", recipe,
+        "-mf", flavour,
+        "-o", os.path.join(PWD, "..", "..", "subaligner", "resource", "test_aligned.srt"),
+        "-q"], shell=False)
+    step.context.exit_code = process.wait(timeout=WAIT_TIMEOUT_IN_SECONDS)
+
+
 @when('I run the alignment with {aligner:S} on them with {mode:S} stage and output "{file_name:S}"')
 def run_subaligner_with_output(step, aligner, mode, file_name):
     if mode == "<NULL>":
diff --git a/tests/subaligner/test_transcriber.py b/tests/subaligner/test_transcriber.py
new file mode 100644
index 0000000..95d2de7
--- /dev/null
+++ b/tests/subaligner/test_transcriber.py
@@ -0,0 +1,43 @@
+import os
+import unittest
+from subaligner.transcriber import Transcriber as Undertest
+from subaligner.exception import TranscriptionException
+
+
+class TranscriberTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.video_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource", "test.mp4")
+        self.undertest = Undertest(recipe="whisper", flavour="tiny")
+
+    def test_transcribe(self):
+        subtitle, frame_rate = self.undertest.transcribe(self.video_file_path, "eng")
+        assert len(subtitle.subs) > 0
+        assert frame_rate == 24
+
+    def test_throw_exception_on_unknown_recipe(self):
+        try:
+            Undertest(recipe="unknown")
+        except Exception as e:
+            self.assertTrue(isinstance(e, NotImplementedError))
+            self.assertEqual(str(e), "Unknown recipe: unknown")
+        else:
+            self.fail("Should have thrown exception")
+
+    def test_throw_exception_on_unknown_flavour(self):
+        try:
+            Undertest(recipe="whisper", flavour="unknown")
+        except Exception as e:
+            self.assertTrue(isinstance(e, NotImplementedError))
+            self.assertEqual(str(e), "Unknown whisper flavour: unknown")
+        else:
+            self.fail("Should have thrown exception")
+
+    def test_throw_exception_on_unsupported_language(self):
+        try:
+            self.undertest.transcribe(self.video_file_path, "abc")
+        except Exception as e:
+            self.assertTrue(isinstance(e, TranscriptionException))
+            self.assertEqual(str(e), '"abc" is not supported by whisper (tiny)')
+        else:
+            self.fail("Should have thrown exception")

From 3eed835e9481f1fa0a05be7b03c16607dd6e8543 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 10 Mar 2023 19:11:04 +0000
Subject: [PATCH 06/20] bump up the version for the new release

---
 Makefile               | 2 +-
 subaligner/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4c414da..c756287 100644
--- a/Makefile
+++ b/Makefile
@@ -149,7 +149,7 @@ manual: clean-manual ## generate manual pages
 test-dist:
 	if [ ! -e ".$(PYTHON)" ]; then ~/.pyenv/versions/$(PYTHON)/bin/python3 -m venv .$(PYTHON); fi
 	.$(PYTHON)/bin/pip install --upgrade pip setuptools wheel; \
-	.$(PYTHON)/bin/pip install -e . --use-feature=2020-resolver; \
+	.$(PYTHON)/bin/pip install -e .
 
 dist: clean-dist test-dist
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
diff --git a/subaligner/_version.py b/subaligner/_version.py
index 0f6fa4c..98c74dc 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.2.5"
+__version__ = "0.3.0"

From 197495fe528a7b276b6ad8af5319e368f490aaf0 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 10 Mar 2023 19:52:11 +0000
Subject: [PATCH 07/20] retire ubuntu-18 image

---
 .github/workflows/dockerhub.yml | 13 -------------
 README.md                       |  3 ++-
 docker/Dockerfile-Ubuntu18      | 19 -------------------
 docker/Dockerfile-Ubuntu20      |  1 -
 site/source/index.rst           |  2 +-
 site/source/usage.rst           |  3 ++-
 6 files changed, 5 insertions(+), 36 deletions(-)
 delete mode 100644 docker/Dockerfile-Ubuntu18

diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
index 7a41c85..c4c6ccd 100644
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@@ -60,19 +60,6 @@ jobs:
           tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.u20
           push: true
 
-      - name: Build and push the Ubuntu 18 image
-        id: docker_build_u18
-        uses: docker/build-push-action@v2
-        with:
-          context: ./docker
-          file: "./docker/Dockerfile-Ubuntu18"
-          build-args: |
-            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
-          allow: network.host
-          github-token: ${{ github.token }}
-          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.u18
-          push: true
-
       - name: Build and push the Fedora 31 image
         id: docker_build_fed31
         uses: docker/build-push-action@v2
diff --git a/README.md b/README.md
index 2ed868a..6d0f51d 100644
--- a/README.md
+++ b/README.md
@@ -120,6 +120,7 @@ $ subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/sub
 ```
 # Generate subtitles by transcribing audiovisual files
 $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
+$ subaligner -m transcribe -v video.mp4 -ml zho -mr whisper -mf medium -o subtitle_aligned.srt
 ```
 ```
 # Alignment on segmented plain texts (double newlines as the delimiter)
@@ -145,7 +146,7 @@ $ subaligner --languages
 $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
-$ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
+$ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
 ```
 ```
 # Shift subtitle manually by offset in seconds
diff --git a/docker/Dockerfile-Ubuntu18 b/docker/Dockerfile-Ubuntu18
deleted file mode 100644
index 997b85b..0000000
--- a/docker/Dockerfile-Ubuntu18
+++ /dev/null
@@ -1,19 +0,0 @@
-# Subaligner Ubuntu 18 Docker Image
-FROM ubuntu:18.04
-
-ARG RELEASE_VERSION
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV RELEASE_VERSION=${RELEASE_VERSION}
-ENV TZ=Europe/London
-
-RUN ["/bin/bash", "-c", "apt-get -y update &&\
-    apt-get -y install ffmpeg &&\
-    apt-get -y install espeak libespeak1 libespeak-dev espeak-data &&\
-    apt-get -y install libsndfile-dev &&\
-    apt-get -y install python3-dev &&\
-    apt-get -y install python3-tk &&\
-    apt-get -y install python3-pip &&\
-    python3 -m pip install --upgrade pip &&\
-    python3 -m pip install \"subaligner==${RELEASE_VERSION}\" &&\
-    python3 -m pip install \"subaligner[harmony]==${RELEASE_VERSION}\""]
diff --git a/docker/Dockerfile-Ubuntu20 b/docker/Dockerfile-Ubuntu20
index b2a64ac..297bb4e 100644
--- a/docker/Dockerfile-Ubuntu20
+++ b/docker/Dockerfile-Ubuntu20
@@ -7,7 +7,6 @@ ENV RELEASE_VERSION=${RELEASE_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=Europe/London
 
-RUN echo "$RELEASE_VERSION"
 RUN ["/bin/bash", "-c", "apt-get -y update &&\
     apt-get -y install ffmpeg &&\
     apt-get -y install espeak libespeak1 libespeak-dev espeak-data &&\
diff --git a/site/source/index.rst b/site/source/index.rst
index 7c0feb1..6d34def 100644
--- a/site/source/index.rst
+++ b/site/source/index.rst
@@ -26,7 +26,7 @@ hand, advanced users can train their own synchronisers with a single command and
 and can be converted from one to another either during synchronisation and translation or on on-demand.
 
 Even without any subtitles available beforehand, Subaligner provides transcription by utilising SOTA Large Language
-models. This pipeline, combined with translation, can generate near ready-to-use subtitles of increasingly higher quality in
+Models (LLMs). This pipeline, combined with translation, can generate near ready-to-use subtitles of increasingly higher quality in
 various languages and formats which cater to your preferences, thanks to those models continually advancing over time.
 
 Subligner supports the following subtitle formats: SubRip, TTML, WebVTT, (Advanced) SubStation Alpha, MicroDVD, MPL2, TMP,
diff --git a/site/source/usage.rst b/site/source/usage.rst
index 0ecfbea..b0cbf5f 100644
--- a/site/source/usage.rst
+++ b/site/source/usage.rst
@@ -32,6 +32,7 @@ Make sure you have got the virtual environment activated upfront.
 **Generate subtitles by transcribing audiovisual files**::
 
     (.venv) $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
+    (.venv) $ subaligner -m transcribe -v video.mp4 -ml zho -mr whisper -mf medium -o subtitle_aligned.srt
 
 **Alignment on segmented plain texts (double newlines as the delimiter)**::
 
@@ -55,7 +56,7 @@ Make sure you have got the virtual environment activated upfront.
     (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
-    (.venv) $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
+    (.venv) $ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
 
 **Shift subtitle manually by offset in seconds**::
 

From f05f4d7a0f25cc205b14cf6086b0c472a4f26c62 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Tue, 14 Mar 2023 09:23:06 +0000
Subject: [PATCH 08/20] post relase tidy up

---
 CITATION.cff                         |   1 -
 README.md                            |  58 ++------
 site/source/advanced_usage.rst       |   2 -
 site/source/installation.rst         |   8 +-
 site/source/usage.rst                |  25 +---
 subaligner/__init__.py               |   5 +
 subaligner/__main__.py               |  44 ++++--
 subaligner/_version.py               |   2 +-
 subaligner/exception.py              |   4 +
 subaligner/llm.py                    |  29 ++++
 subaligner/transcriber.py            |  54 ++-----
 subaligner/translator.py             | 215 ++++++++++++---------------
 subaligner/utils.py                  |  36 +++++
 tests/subaligner/test_transcriber.py |   5 +-
 tests/subaligner/test_translator.py  |   5 -
 tests/subaligner/test_utils.py       |  14 ++
 16 files changed, 256 insertions(+), 251 deletions(-)
 create mode 100644 subaligner/llm.py

diff --git a/CITATION.cff b/CITATION.cff
index 2fda752..2eb84f4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -5,7 +5,6 @@ authors:
     given-names: Xi
     orcid: https://orcid.org/0000-0002-2177-8458
 title: "Subaligner: Towards Automated Subtitle Alignment"
-version: 0.2.1
 doi: 10.5281/zenodo.5603083
 date-released: 2021-10-28
 url: "https://github.com/baxtree/subaligner"
\ No newline at end of file
diff --git a/README.md b/README.md
index 6d0f51d..9713282 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,12 @@ $ brew install ffmpeg
 $ pip install -U pip
 $ pip install subaligner
 ```
+or install from source:
+```
+$ git clone git@github.com:baxtree/subaligner.git
+$ cd subaligner
+$ python setup.py install
+```
 
 ## Installation with Optional Packages Supporting Additional Features
 ```
@@ -61,31 +67,10 @@ To install all supported features:
 $ pip install 'subaligner[harmony]'
 ```
 
-## Alternative Installations
-```
-# Install via pipx
-$ pip install -U pip pipx
-$ pipx install subaligner
-```
-or
-```
-# Install from GitHub via Pipenv
-$ pipenv install subaligner
-$ pipenv install 'subaligner[stretch]'
-$ pipenv install 'subaligner[dev]'
-```
-or
-```
-# Install from source
+## Container Support
+If you prefer using a containerised environment over installing everything locally, run:
 
-$ git clone git@github.com:baxtree/subaligner.git
-$ cd subaligner
-$ python setup.py install
-```
-or
 ```
-# Use dockerised installation
-
 $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner bash
 ```
 For users on Windows 10: [Docker Desktop](https://docs.docker.com/docker-for-windows/install/) is the only option at present.
@@ -99,22 +84,13 @@ docker run -v "/d/media":/media -w "/media" -it baxtree/subaligner bash
 ```
 # Single-stage alignment (high-level shift with lower latency)
 
-$ subaligner_1pass -v video.mp4 -s subtitle.srt
-$ subaligner_1pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
+$ subaligner -m single -v video.mp4 -s subtitle.srt
+$ subaligner -m single -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 ```
 ```
 # Dual-stage alignment (low-level shift with higher latency)
 
-$ subaligner_2pass -v video.mp4 -s subtitle.srt
-$ subaligner_2pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
-```
-or
-```
-# Pass in single-stage or dual-stage as the alignment mode
-
-$ subaligner -m single -v video.mp4 -s subtitle.srt
 $ subaligner -m dual -v video.mp4 -s subtitle.srt
-$ subaligner -m single -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 $ subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 ```
 ```
@@ -142,6 +118,7 @@ $ subaligner -m dual -v video.mkv -s embedded:stream_index=0 -o subtitle_aligned
 ```
 ```
 # Translative alignment with the ISO 639-3 language code pair (src,tgt)
+
 $ subaligner --languages
 $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
@@ -171,20 +148,17 @@ $ pipx run subaligner -m dual -v video.mp4 -s subtitle.srt
 # Run the module as a script
 $ python -m subaligner -m single -v video.mp4 -s subtitle.srt
 $ python -m subaligner -m dual -v video.mp4 -s subtitle.srt
-$ python -m subaligner.subaligner_1pass -v video.mp4 -s subtitle.srt
-$ python -m subaligner.subaligner_2pass -v video.mp4 -s subtitle.srt
 ```
 ```
 # Run alignments with the docker image
 
 $ docker pull baxtree/subaligner
-$ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_1pass -v video.mp4 -s subtitle.srt
-$ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_2pass -v video.mp4 -s subtitle.srt
-$ docker run -it baxtree/subaligner subaligner_1pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
-$ docker run -it baxtree/subaligner subaligner_2pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
+$ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner -m single -v video.mp4 -s subtitle.srt
+$ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner -m dual -v video.mp4 -s subtitle.srt
+$ docker run -it baxtree/subaligner subaligner -m single -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
+$ docker run -it baxtree/subaligner subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 ```
-The aligned subtitle will be saved at `subtitle_aligned.srt`. For details on CLI, run `subaligner_1pass -h`, `subaligner_2pass -h` or `subaligner -h`.
-Additional utilities can be used after consulting `subaligner_batch -h`, `subaligner_convert -h`, `subaligner_train -h` and `subaligner_tune -h`.
+The aligned subtitle will be saved at `subtitle_aligned.srt`. For details on CLIs, run `subaligner -h` or `subaligner_batch -h`, `subaligner_convert -h`, `subaligner_train -h` and `subaligner_tune -h` for additional utilities. `subaligner_1pass` and `subaligner_2pass` are shortcuts for running `subaligner` with `-m single` and `-m dual` options, respectively.
 
 ![](figures/screencast.gif)
 
diff --git a/site/source/advanced_usage.rst b/site/source/advanced_usage.rst
index 33c8716..952ed59 100644
--- a/site/source/advanced_usage.rst
+++ b/site/source/advanced_usage.rst
@@ -64,8 +64,6 @@ is present, make sure the folder passed in is empty.
 
     (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt -tod training_output_directory
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -tod training_output_directory
-    (.venv) $ subaligner_1pass -v video.mp4 -s subtitle.srt -tod training_output_directory
-    (.venv) $ subaligner_2pass -v video.mp4 -s subtitle.srt -tod training_output_directory
 
 To apply your trained model to subtitle alignment, pass in the training_output_directory containing training results as
 shown above with `-tod` or `--training_output_directory`.
diff --git a/site/source/installation.rst b/site/source/installation.rst
index 1568f23..8e8da43 100644
--- a/site/source/installation.rst
+++ b/site/source/installation.rst
@@ -53,11 +53,11 @@ Installation
     $ pipenv install 'subaligner[stretch]'
     $ pipenv install 'subaligner[dev]'
 
-**Use dockerised installation**::
+**Container Support**::
 
     $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner bash
 
-The following builds are available on dockerhub for several Linux distributions: CentOS 7 (latest and VERSION.el7), CentOS 8 (VERSION.el8), Ubuntu 18 (VERSION.u18), Ubuntu 20 (VERSION.u20), Debian 10 (VERSION.deb10), Fedora 31 (VERSION.fed31) and ArchLinux (VERSION.arch).
+Users may prefer using a containerised environment over installing everything locally. The following builds are available on dockerhub for several Linux distributions: CentOS 7 (latest and VERSION.el7), CentOS 8 (VERSION.el8), Ubuntu 18 (VERSION.u18), Ubuntu 20 (VERSION.u20), Debian 10 (VERSION.deb10), Fedora 31 (VERSION.fed31) and ArchLinux (VERSION.arch).
 
 You can also download the latest
 release on `GitHub <https://github.com/baxtree/subaligner>`_ and follow the steps down below
@@ -72,8 +72,8 @@ to create a virtual environment and set up all the dependencies:
 **Subaligner CLI should be on your PATH now**::
 
     (.venv) $ subaligner --help
-    (.venv) $ subaligner_1pass --help
-    (.venv) $ subaligner_2pass --help
+    (.venv) $ subaligner_1pass --help # shortcut for "subaligner -m single"
+    (.venv) $ subaligner_2pass --help # shortcut for "subaligner -m dual"
     (.venv) $ subaligner_batch --help
     (.venv) $ subaligner_convert --help
     (.venv) $ subaligner_train --help
diff --git a/site/source/usage.rst b/site/source/usage.rst
index b0cbf5f..b67ab88 100644
--- a/site/source/usage.rst
+++ b/site/source/usage.rst
@@ -14,18 +14,11 @@ Make sure you have got the virtual environment activated upfront.
 
 **Single-stage alignment (high-level shift with lower latency)**::
 
-    (.venv) $ subaligner_1pass -v video.mp4 -s subtitle.srt
-    (.venv) $ subaligner_1pass -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
+    (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt
+    (.venv) $ subaligner -m single -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
 
 **Dual-stage alignment (low-level shift with higher latency)**::
 
-    (.venv) $ subaligner_2pass -v video.mp4 -s subtitle.srt
-    (.venv) $ subaligner_2pass -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
-
-**Pass in single-stage or dual-stage as the alignment mode**::
-
-    (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt
-    (.venv) $ subaligner -m single -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt
     (.venv) $ subaligner -m dual -v https://example.org/video.mp4 -s https://example.org/subtitle.srt -o subtitle_aligned.srt
 
@@ -72,10 +65,10 @@ Make sure you have got the virtual environment activated upfront.
 **Run alignments with the docker image**::
 
     $ docker pull baxtree/subaligner
-    $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_1pass -v video.mp4 -s subtitle.srt
-    $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_2pass -v video.mp4 -s subtitle.srt
-    $ docker run -it baxtree/subaligner subaligner_1pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
-    $ docker run -it baxtree/subaligner subaligner_2pass -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
+    $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner -m single -v video.mp4 -s subtitle.srt
+    $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner -m dual -v video.mp4 -s subtitle.srt
+    $ docker run -it baxtree/subaligner subaligner -m single -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
+    $ docker run -it baxtree/subaligner subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
 
 **Run alignments with pipx**::
 
@@ -86,22 +79,16 @@ Make sure you have got the virtual environment activated upfront.
 
     $ python -m subaligner -m single -v video.mp4 -s subtitle.srt
     $ python -m subaligner -m dual -v video.mp4 -s subtitle.srt
-    $ python -m subaligner.subaligner_1pass -v video.mp4 -s subtitle.srt
-    $ python -m subaligner.subaligner_2pass -v video.mp4 -s subtitle.srt
 
 Currently the stretching is experimental and make sure subaligner[stretch] is installed before switching it on with `-so`
 or `--stretch_on` as shown below.
 
 **Switch on stretching when aligning subtitles**::
 
-    (.venv) $ subaligner_2pass -v video.mp4 -s subtitle.srt -so
-    or
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -so
 
 **Save the aligned subtitle to a specific location**::
 
-    (.venv) $ subaligner_2pass -v video.mp4 -s subtitle.srt -o /path/to/the/output/subtitle.srt
-    or
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -o /path/to/the/output/subtitle.srt
 
 **On Windows**::
diff --git a/subaligner/__init__.py b/subaligner/__init__.py
index e3206b2..ae79fcb 100644
--- a/subaligner/__init__.py
+++ b/subaligner/__init__.py
@@ -1,7 +1,12 @@
 import os
+import warnings
 import multiprocessing as mp
 from ._version import __version__
 
 __all__ = ["__version__"]
+
+warnings.filterwarnings("ignore")
+warnings.simplefilter("ignore")
+
 mp.set_start_method("spawn", force=True)
 os.environ["KMP_WARNINGS"] = "0"
diff --git a/subaligner/__main__.py b/subaligner/__main__.py
index 4afa02f..63cc178 100755
--- a/subaligner/__main__.py
+++ b/subaligner/__main__.py
@@ -4,14 +4,15 @@
                   [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
                   [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
                   [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
-                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
+                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-tr {helsinki-nlp,whisper}] [-tf TRANSLATION_FLAVOUR] [-lgs]
+                  [-d] [-q] [-ver]
 
 Subaligner command line interface
 
 optional arguments:
   -h, --help            show this help message and exit
   -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
-                        File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
+                        File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .sub, .ytt, .smi, .sami, .tmp, .txt, .ssa, .vtt, .stl, .xml, .ass, .scc, .dfxp, .sbv, .srt) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
   -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
                         Max global log loss for alignment
   -so, --stretch_on     Switch on stretch on subtitles)
@@ -32,7 +33,11 @@
   -mr {whisper}, --llm_recipe {whisper}
                         LLM recipe used for transcribing video files
   -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
-                        Flavour variation for a specific LLM recipe
+                        Flavour variation for a specific LLM recipe supporting transcription
+  -tr {helsinki-nlp,whisper}, --translation_recipe {helsinki-nlp,whisper}
+                        LLM recipe used for translating subtitles
+  -tf TRANSLATION_FLAVOUR, --translation_flavour TRANSLATION_FLAVOUR
+                        Flavour variation for a specific LLM recipe supporting translation
   -lgs, --languages     Print out language codes used for stretch and translation
   -d, --debug           Print out debugging information
   -q, --quiet           Switch off logging information
@@ -152,21 +157,40 @@ def main():
         choices=Utils.get_stretch_language_codes(),
         help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
     )
+    from subaligner.llm import TranscriptionRecipe
+    from subaligner.llm import WhisperFlavour
     parser.add_argument(
         "-mr",
         "--llm_recipe",
         type=str.lower,
-        default="whisper",
-        choices=["whisper"],
+        default=TranscriptionRecipe.WHISPER.value,
+        choices=[r.value for r in TranscriptionRecipe],
         help="LLM recipe used for transcribing video files"
     )
     parser.add_argument(
         "-mf",
         "--llm_flavour",
         type=str.lower,
-        default="small",
-        choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
-        help="Flavour variation for a specific LLM recipe"
+        default=WhisperFlavour.SMALL.value,
+        choices=[wf.value for wf in WhisperFlavour],
+        help="Flavour variation for a specific LLM recipe supporting transcription"
+    )
+    from subaligner.llm import TranslationRecipe
+    from subaligner.llm import HelsinkiNLPFlavour
+    parser.add_argument(
+        "-tr",
+        "--translation_recipe",
+        type=str.lower,
+        default=TranslationRecipe.HELSINKI_NLP.value,
+        choices=[r.value for r in TranslationRecipe],
+        help="LLM recipe used for translating subtitles"
+    )
+    parser.add_argument(
+        "-tf",
+        "--translation_flavour",
+        type=str.lower,
+        default=None,
+        help="Flavour variation for a specific LLM recipe supporting translation"
     )
     parser.add_argument("-lgs", "--languages", action="store_true",
                         help="Print out language codes used for stretch and translation")
@@ -312,8 +336,8 @@ def main():
                 if FLAGS.translate is not None:
                     from subaligner.translator import Translator
                     source, target = FLAGS.translate.split(",")
-                    translator = Translator(source, target)
-                    aligned_subs = translator.translate(aligned_subs)
+                    translator = Translator(src_language=source, tgt_language=target, recipe=FLAGS.translation_recipe, flavour=FLAGS.translation_flavour)
+                    aligned_subs = translator.translate(aligned_subs, local_video_path)
                     Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
                                                         frame_rate, "utf-8")
                 elif FLAGS.mode == "transcribe":
diff --git a/subaligner/_version.py b/subaligner/_version.py
index 98c74dc..7945cf2 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.3.0"
+__version__ = "0.3.1"
diff --git a/subaligner/exception.py b/subaligner/exception.py
index cb35725..c686b4d 100644
--- a/subaligner/exception.py
+++ b/subaligner/exception.py
@@ -10,5 +10,9 @@ class NoFrameRateException(Exception):
     """ An exception raised due to frame rate not found."""
 
 
+class TranslationException(Exception):
+    """ An exception raised due to translation failures."""
+
+
 class TranscriptionException(Exception):
     """ An exception raised due to transcription failures."""
diff --git a/subaligner/llm.py b/subaligner/llm.py
new file mode 100644
index 0000000..7d7aecf
--- /dev/null
+++ b/subaligner/llm.py
@@ -0,0 +1,29 @@
+from enum import Enum
+
+
+class TranscriptionRecipe(Enum):
+    WHISPER = "whisper"
+
+
+class TranslationRecipe(Enum):
+    HELSINKI_NLP = "helsinki-nlp"
+    WHISPER = "whisper"
+
+
+class WhisperFlavour(Enum):
+    TINY = "tiny"
+    TINY_EN = "tiny.en"
+    SMALL = "small"
+    MEDIUM = "medium"
+    MEDIUM_EN = "medium.en"
+    BASE = "base"
+    BASE_EN = "base.en"
+    LARGE_V1 = "large-v1"
+    LARGE_V2 = "large-v2"
+    LARGE = "large"
+
+
+class HelsinkiNLPFlavour(Enum):
+    OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
+    OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
+    OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py
index 3dc2b77..0a0481a 100644
--- a/subaligner/transcriber.py
+++ b/subaligner/transcriber.py
@@ -1,13 +1,15 @@
 import os
 import whisper
-from enum import Enum
 from typing import Tuple, Optional
 from pysrt import SubRipTime
 from whisper.tokenizer import LANGUAGES
 from .translator import Translator
 from .subtitle import Subtitle
 from .media_helper import MediaHelper
+from .llm import TranscriptionRecipe, WhisperFlavour
+from .singleton import Singleton
 from .logger import Logger
+from .utils import Utils
 from .exception import NoFrameRateException, TranscriptionException
 
 
@@ -15,7 +17,7 @@ class Transcriber(object):
     """Transcribe audiovisual content for subtitle generation.
     """
 
-    def __init__(self, recipe: str = "whisper", flavour: str = "small") -> None:
+    def __init__(self, recipe: str = TranscriptionRecipe.WHISPER.value, flavour: str = WhisperFlavour.SMALL.value) -> None:
         """Initialiser for the transcribing process.
 
         Arguments:
@@ -24,14 +26,14 @@ def __init__(self, recipe: str = "whisper", flavour: str = "small") -> None:
         Raises:
             NotImplementedError -- Thrown when the LLM recipe is unknown.
         """
-        if recipe not in [r.value for r in Recipe]:
+        if recipe not in [r.value for r in TranscriptionRecipe]:
             raise NotImplementedError(f"Unknown recipe: {recipe}")
-        if recipe == Recipe.whisper.value:
+        if recipe == TranscriptionRecipe.WHISPER.value:
             if flavour not in [f.value for f in WhisperFlavour]:
                 raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
             self.__model = whisper.load_model(flavour)
-        self.recipe = recipe
-        self.flavour = flavour
+        self.__recipe = recipe
+        self.__flavour = flavour
         self.__media_helper = MediaHelper()
         self.__LOGGER = Logger().get_logger(__name__)
 
@@ -45,10 +47,10 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
             TranscriptionException -- Thrown when transcription is failed.
             NotImplementedError -- Thrown when the LLM recipe is not supported.
         """
-        if self.recipe == "whisper":
-            lang = Translator.get_iso_639_alpha_2(language_code)
+        if self.__recipe == "whisper":
+            lang = Utils.get_iso_639_alpha_2(language_code)
             if lang not in LANGUAGES:
-                raise TranscriptionException(f'"{language_code}" is not supported by {self.recipe} ({self.flavour})')
+                raise TranscriptionException(f'"{language_code}" is not supported by {self.__recipe} ({self.__flavour})')
             audio_file_path = self.__media_helper.extract_audio(video_file_path, True, 16000)
             try:
                 audio = whisper.load_audio(audio_file_path)
@@ -58,7 +60,7 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
                 srt_str = ""
                 for i, segment in enumerate(result["segments"], start=1):
                     srt_str += f"{i}\n" \
-                               f"{self.__format_timestamp(segment['start'])} --> {self.__format_timestamp(segment['end'])}\n" \
+                               f"{Utils.format_timestamp(segment['start'])} --> {Utils.format_timestamp(segment['end'])}\n" \
                                f"{segment['text'].strip().replace('-->', '->')}\n" \
                                "\n"
                 subtitle = Subtitle.load_subrip_str(srt_str)
@@ -69,20 +71,7 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
                 if os.path.exists(audio_file_path):
                     os.remove(audio_file_path)
         else:
-            raise NotImplementedError(f"{self.recipe} ({self.flavour}) is not supported")
-
-    @staticmethod
-    def __format_timestamp(seconds: float) -> str:
-        assert seconds >= 0, "non-negative timestamp expected"
-        milliseconds = round(seconds * 1000.0)
-        hours = milliseconds // 3_600_000
-        milliseconds -= hours * 3_600_000
-        minutes = milliseconds // 60_000
-        milliseconds -= minutes * 60_000
-        seconds = milliseconds // 1_000
-        milliseconds -= seconds * 1_000
-        hours_marker = f"{hours:02d}:"
-        return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+            raise NotImplementedError(f"{self.__recipe} ({self.__flavour}) is not supported")
 
     def __on_frame_timecodes(self, subtitle: Subtitle, video_file_path: str) -> Tuple[Subtitle, Optional[float]]:
         frame_rate = None
@@ -99,20 +88,3 @@ def __on_frame_timecodes(self, subtitle: Subtitle, video_file_path: str) -> Tupl
         except NoFrameRateException:
             self.__LOGGER.warning("Cannot detect the frame rate for %s" % video_file_path)
         return subtitle, frame_rate
-
-
-class Recipe(str, Enum):
-    whisper = "whisper"
-
-
-class WhisperFlavour(str, Enum):
-    tiny = "tiny"
-    tiny_en = "tiny.en"
-    small = "small"
-    medium = "medium"
-    medium_en = "medium.en"
-    base = "base"
-    base_en = "base.en"
-    large_v1 = "large-v1"
-    large_v2 = "large-v2"
-    large = "large"
diff --git a/subaligner/translator.py b/subaligner/translator.py
index 28277fa..723183e 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -1,23 +1,25 @@
 import math
-import pycountry
 import time
+import whisper
 from copy import deepcopy
+from typing import List, Generator, Optional
 from pysrt import SubRipItem
 from tqdm import tqdm
 from transformers import MarianMTModel, MarianTokenizer
-from typing import List, Generator
+from whisper.tokenizer import LANGUAGES
 from .singleton import Singleton
+from .llm import TranslationRecipe, HelsinkiNLPFlavour, WhisperFlavour
+from .utils import Utils
+from .subtitle import Subtitle
 from .logger import Logger
+from .exception import TranslationException
 
 
-class Translator(metaclass=Singleton):
+class Translator(object):
     """Translate subtitles.
     """
 
     __TENSOR_TYPE = "pt"
-    __OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
-    __OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
-    __OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
     __TRANSLATING_BATCH_SIZE = 10
     __LANGUAGE_CODE_MAPPER = {
         "bos": "zls",
@@ -46,41 +48,29 @@ class Translator(metaclass=Singleton):
         "jpn-eng": "jap-eng"
     }
 
-    def __init__(self, src_language, tgt_language) -> None:
+    def __init__(self,
+                 src_language: str,
+                 tgt_language: str,
+                 recipe: str = TranslationRecipe.HELSINKI_NLP.value,
+                 flavour: Optional[str] = None) -> None:
         """Initialiser for the subtitle translation.
 
         Arguments:
             src_language {string} -- The source language code derived from ISO 639-3.
             tgt_language {string} -- The target language code derived from ISO 639-3.
+            recipe {string} -- the LLM recipe used for transcribing video files (default: "helsinki-nlp").
+            flavour {string} -- the flavour variation for a specific LLM recipe (default: None).
 
         Raises:
             NotImplementedError -- Thrown when the model of the specified language pair is not found.
         """
 
         self.__LOGGER = Logger().get_logger(__name__)
-        self.__initialise_model(src_language, tgt_language)
-
-    @staticmethod
-    def get_iso_639_alpha_2(language_code: str) -> str:
-        """Find the alpha 2 language code based on an alpha 3 one.
-
-        Arguments:
-            language_code {string} -- An alpha 3 language code derived from ISO 639-3.
-
-        Returns:
-            string -- The alpha 2 language code if exists otherwise the alpha 3 one.
-
-        Raises:
-            ValueError -- Thrown when the input language code cannot be recognised.
-        """
-
-        lang = pycountry.languages.get(alpha_3=language_code)
-        if lang is None:
-            return language_code
-        elif hasattr(lang, "alpha_2"):
-            return lang.alpha_2
-        else:
-            return lang.alpha_3
+        if recipe not in [r.value for r in TranslationRecipe]:
+            raise NotImplementedError(f"Unknown recipe: {recipe}")
+        self.__recipe = recipe
+        self.__tgt_language = tgt_language
+        self.__initialise_model(src_language, tgt_language, recipe, flavour)
 
     @staticmethod
     def normalise_single(language_code: str) -> str:
@@ -112,129 +102,106 @@ def normalise_pair(src_language: str, tgt_language: str) -> List[str]:
         else:
             return [src_language, tgt_language]
 
-    def translate(self, subs: List[SubRipItem]) -> List[SubRipItem]:
+    def translate(self, subs: List[SubRipItem], video_file_path: Optional[str] = None) -> List[SubRipItem]:
         """Translate a list of subtitle cues.
 
         Arguments:
             subs {list} -- A list of SubRipItems.
+            video_file_path {string} -- The input video file path (default: None)..
 
         Returns:
             {list} -- A list of new SubRipItems holding the translation results.
         """
 
-        translated_texts = []
-        self.lang_model.eval()
-        new_subs = deepcopy(subs)
-        src_texts = [sub.text for sub in new_subs]
-        num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
-        self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
-        for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
-            input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
-            translated = self.lang_model.generate(**input_ids)
-            translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
-        for index in range(len(new_subs)):
-            new_subs[index].text = translated_texts[index]
-        self.__LOGGER.info("Subtitle translated")
-        return new_subs
-
-    def __initialise_model(self, src_lang: str, tgt_lang: str) -> None:
-        src_lang = Translator.normalise_single(src_lang)
-        tgt_lang = Translator.normalise_single(tgt_lang)
-        src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
-
-        if self.__download_mt_model(src_lang, tgt_lang):
-            return
-        elif self.__download_mt_tc_big_model(src_lang, tgt_lang):
-            return
-        elif self.__download_tatoeba_model(src_lang, tgt_lang):
-            return
+        if self.__recipe == TranslationRecipe.HELSINKI_NLP.value:
+            translated_texts = []
+            self.lang_model.eval()
+            new_subs = deepcopy(subs)
+            src_texts = [sub.text for sub in new_subs]
+            num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
+            self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
+            for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
+                input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
+                translated = self.lang_model.generate(**input_ids)
+                translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
+            for index in range(len(new_subs)):
+                new_subs[index].text = translated_texts[index]
+            self.__LOGGER.info("Subtitle translated")
+            return new_subs
+        elif self.__recipe == TranslationRecipe.WHISPER.value:
+            assert video_file_path is not None
+            lang = Utils.get_iso_639_alpha_2(self.__tgt_language)
+            if lang not in LANGUAGES:
+                raise TranslationException(f'"{self.__tgt_language}" is not supported by {self.__recipe}')
+            audio = whisper.load_audio(video_file_path)
+            self.__LOGGER.debug("Start translating the audio...")
+            result = self.lang_model.transcribe(audio, task="translate", language=LANGUAGES[lang])
+            self.__LOGGER.info("Finished translating the audio")
+            srt_str = ""
+            for i, segment in enumerate(result["segments"], start=1):
+                srt_str += f"{i}\n" \
+                           f"{Utils.format_timestamp(segment['start'])} --> {Utils.format_timestamp(segment['end'])}\n" \
+                           f"{segment['text'].strip().replace('-->', '->')}\n" \
+                           "\n"
+            subtitle = Subtitle.load_subrip_str(srt_str)
+            return subtitle.subs
         else:
-            message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
-            self.__LOGGER.error(message)
-            raise NotImplementedError(message)
-
-    def __download_mt_model(self, src_lang: str, tgt_lang: str) -> bool:
+            return []
+
+    def __initialise_model(self, src_lang: str, tgt_lang: str, recipe: str, flavour: Optional[str]) -> None:
+        if recipe == TranslationRecipe.HELSINKI_NLP.value:
+            src_lang = Translator.normalise_single(src_lang)
+            tgt_lang = Translator.normalise_single(tgt_lang)
+            src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
+
+            if self.__download_mt_model(src_lang, tgt_lang, HelsinkiNLPFlavour.OPUS_MT.value):
+                return
+            elif self.__download_mt_model(src_lang, tgt_lang, HelsinkiNLPFlavour.OPUS_TATOEBA.value):
+                return
+            elif self.__download_mt_model(src_lang, tgt_lang, HelsinkiNLPFlavour.OPUS_MT_TC_BIG.value):
+                return
+            else:
+                message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
+                self.__LOGGER.error(message)
+                raise NotImplementedError(message)
+        elif recipe == TranslationRecipe.WHISPER.value:
+            if flavour in [f.value for f in WhisperFlavour]:
+                # self.__download_whisper_model(flavour)
+                self.__download_whisper_model("medium")  # works for translation target other than English
+            else:
+                raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
+
+    def __download_mt_model(self, src_lang: str, tgt_lang: str, flavour: str) -> bool:
         try:
-            mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_model_name)
+            mt_model_name = flavour.format(Utils.get_iso_639_alpha_2(src_lang), Utils.get_iso_639_alpha_2(tgt_lang))
+            self.__download_by_mt_name(mt_model_name)
             return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
-            mt_model_name = Translator.__OPUS_MT.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_model_name)
+            mt_model_name = flavour.format(src_lang, Utils.get_iso_639_alpha_2(tgt_lang))
+            self.__download_by_mt_name(mt_model_name)
             return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
-            mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
-            self.__download(mt_model_name)
+            mt_model_name = flavour.format(Utils.get_iso_639_alpha_2(src_lang), tgt_lang)
+            self.__download_by_mt_name(mt_model_name)
             return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         try:
-            mt_model_name = Translator.__OPUS_MT.format(src_lang, tgt_lang)
-            self.__download(mt_model_name)
+            mt_model_name = flavour.format(src_lang, tgt_lang)
+            self.__download_by_mt_name(mt_model_name)
             return True
         except OSError:
             self.__log_and_back_off(mt_model_name)
         return False
 
-    def __download_mt_tc_big_model(self, src_lang: str, tgt_lang: str) -> bool:
-        try:
-            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_tc_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_tc_model_name)
-        try:
-            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_tc_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_tc_model_name)
-        try:
-            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
-            self.__download(mt_tc_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_tc_model_name)
-        try:
-            mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, tgt_lang)
-            self.__download(mt_tc_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_tc_model_name)
-        return False
-
-    def __download_tatoeba_model(self, src_lang: str, tgt_lang: str) -> bool:
-        try:
-            mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_model_name)
-        try:
-            mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
-            self.__download(mt_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_model_name)
-        try:
-            mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
-            self.__download(mt_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_model_name)
-        try:
-            mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, tgt_lang)
-            self.__download(mt_model_name)
-            return True
-        except OSError:
-            self.__log_and_back_off(mt_model_name)
-        return False
+    def __download_whisper_model(self, flavour: str) -> None:
+        self.lang_model = whisper.load_model(flavour)
 
-    def __download(self, mt_model_name: str) -> None:
+    def __download_by_mt_name(self, mt_model_name: str) -> None:
         self.__LOGGER.debug("Trying to download the MT model %s" % mt_model_name)
         self.tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
         self.lang_model = MarianMTModel.from_pretrained(mt_model_name)
diff --git a/subaligner/utils.py b/subaligner/utils.py
index 29aec66..9375b16 100644
--- a/subaligner/utils.py
+++ b/subaligner/utils.py
@@ -5,6 +5,7 @@
 import shutil
 import cchardet
 import shlex
+import pycountry
 
 from pycaption import (
     CaptionConverter,
@@ -652,6 +653,41 @@ def get_language_table() -> List[str]:
              'sem', 'sit', 'sla', 'srn', 'ssp', 'swc', 'taw', 'tdt', 'tiv', 'tll', 'toi', 'tpi', 'trk', 'tum', 'tut',
              'tvl', 'tzo', 'umb', 'urj', 'vsl', 'wal', 'war', 'wls', 'yap', 'yua', 'zai', 'zle', 'zls', 'zlw', 'zne']
 
+    @staticmethod
+    def get_iso_639_alpha_2(language_code: str) -> str:
+        """Find the alpha 2 language code based on an alpha 3 one.
+
+        Arguments:
+            language_code {string} -- An alpha 3 language code derived from ISO 639-3.
+
+        Returns:
+            string -- The alpha 2 language code if exists otherwise the alpha 3 one.
+
+        Raises:
+            ValueError -- Thrown when the input language code cannot be recognised.
+        """
+
+        lang = pycountry.languages.get(alpha_3=language_code)
+        if lang is None:
+            return language_code
+        elif hasattr(lang, "alpha_2"):
+            return lang.alpha_2
+        else:
+            return lang.alpha_3
+
+    @staticmethod
+    def format_timestamp(seconds: float) -> str:
+        assert seconds >= 0, "non-negative timestamp expected"
+        milliseconds = round(seconds * 1000.0)
+        hours = milliseconds // 3_600_000
+        milliseconds -= hours * 3_600_000
+        minutes = milliseconds // 60_000
+        milliseconds -= minutes * 60_000
+        seconds = milliseconds // 1_000
+        milliseconds -= seconds * 1_000
+        hours_marker = f"{hours:02d}:"
+        return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+
     @staticmethod
     def __convert_subtitle(source_file_path: str, source_ext: str, target_file_path: Optional[str], target_ext: str, format: str, frame_rate: Optional[float] = None) -> Tuple[str, str]:
         encoding = Utils.detect_encoding(source_file_path)
diff --git a/tests/subaligner/test_transcriber.py b/tests/subaligner/test_transcriber.py
index 95d2de7..94bd124 100644
--- a/tests/subaligner/test_transcriber.py
+++ b/tests/subaligner/test_transcriber.py
@@ -1,5 +1,6 @@
 import os
 import unittest
+from subaligner.llm import TranscriptionRecipe, WhisperFlavour
 from subaligner.transcriber import Transcriber as Undertest
 from subaligner.exception import TranscriptionException
 
@@ -8,7 +9,7 @@ class TranscriberTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.video_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource", "test.mp4")
-        self.undertest = Undertest(recipe="whisper", flavour="tiny")
+        self.undertest = Undertest(recipe=TranscriptionRecipe.WHISPER.value, flavour=WhisperFlavour.TINY.value)
 
     def test_transcribe(self):
         subtitle, frame_rate = self.undertest.transcribe(self.video_file_path, "eng")
@@ -26,7 +27,7 @@ def test_throw_exception_on_unknown_recipe(self):
 
     def test_throw_exception_on_unknown_flavour(self):
         try:
-            Undertest(recipe="whisper", flavour="unknown")
+            Undertest(recipe=TranscriptionRecipe.WHISPER.value, flavour="unknown")
         except Exception as e:
             self.assertTrue(isinstance(e, NotImplementedError))
             self.assertEqual(str(e), "Unknown whisper flavour: unknown")
diff --git a/tests/subaligner/test_translator.py b/tests/subaligner/test_translator.py
index 562ee97..a116f2f 100644
--- a/tests/subaligner/test_translator.py
+++ b/tests/subaligner/test_translator.py
@@ -13,11 +13,6 @@ def setUp(self):
             os.path.dirname(os.path.abspath(__file__)), "resource/test.srt"
         )
 
-    def test_get_iso_639_alpha_2(self):
-        self.assertEqual("en", Undertest.get_iso_639_alpha_2("eng"))
-        self.assertEqual("ada", Undertest.get_iso_639_alpha_2("ada"))
-        self.assertEqual("xyz", Undertest.get_iso_639_alpha_2("xyz"))
-
     @patch("transformers.MarianMTModel.from_pretrained")
     @patch("transformers.MarianTokenizer.from_pretrained")
     def test_translate(self, tokenizer_from_pretrained, model_from_pretrained):
diff --git a/tests/subaligner/test_utils.py b/tests/subaligner/test_utils.py
index 7b1f2a0..7581765 100644
--- a/tests/subaligner/test_utils.py
+++ b/tests/subaligner/test_utils.py
@@ -298,6 +298,20 @@ def test_get_misc_language_codes(self):
     def test_get_language_table(self):
         self.assertEqual(200, len(Undertest.get_language_table()))
 
+    def test_get_iso_639_alpha_2(self):
+        self.assertEqual("en", Undertest.get_iso_639_alpha_2("eng"))
+        self.assertEqual("ada", Undertest.get_iso_639_alpha_2("ada"))
+        self.assertEqual("xyz", Undertest.get_iso_639_alpha_2("xyz"))
+
+    def test_format_timestamp(self):
+        test_cases = [
+            (0, "00:00:00,000"),
+            (100, "00:01:40,000"),
+            (100.1, "00:01:40,100"),
+        ]
+        for seconds, time_code in test_cases:
+            self.assertEqual(time_code, Undertest.format_timestamp(seconds))
+
     @patch("subprocess.Popen.communicate", return_value=1)
     def test_throw_exception_on_srt2vtt_with_error_code(self, mock_communicate):
         self._assert_exception_on_subproces(lambda: Undertest.srt2vtt(self.real_srt_path, "output"), mock_communicate)

From 307a325cb172082fd0da13a5b6985d88db806475 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 17 Mar 2023 09:38:34 +0000
Subject: [PATCH 09/20] issue-78 escape quotes in names of files fed into
 ffmpeg

---
 subaligner/media_helper.py     | 20 +++++++++++---------
 subaligner/utils.py            | 14 +++++++++-----
 tests/subaligner/test_utils.py |  6 +++++-
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/subaligner/media_helper.py b/subaligner/media_helper.py
index cd91e54..97f669a 100644
--- a/subaligner/media_helper.py
+++ b/subaligner/media_helper.py
@@ -16,6 +16,7 @@
 from .exception import TerminalException
 from .exception import NoFrameRateException
 from .logger import Logger
+from .utils import Utils
 
 TEMP_DIR_PATH = tempfile.mkdtemp()
 
@@ -73,14 +74,15 @@ def extract_audio(self, video_file_path, decompress: bool = False, freq: int = 1
             )
 
         command = (
-            "{0} -y -xerror -i '{1}' -ac 2 -ar {2} -vn '{3}'".format(
-                self.FFMPEG_BIN, video_file_path, freq, audio_file_path
+            "{0} -y -xerror -i {1} -ac 2 -ar {2} -vn {3}".format(
+                self.FFMPEG_BIN, Utils.double_quoted(video_file_path), freq, Utils.double_quoted(audio_file_path)
             )
             if decompress
-            else "{0} -y -xerror -i '{1}' -vn -acodec copy '{2}'".format(
-                self.FFMPEG_BIN, video_file_path, audio_file_path
+            else "{0} -y -xerror -i {1} -vn -acodec copy {2}".format(
+                self.FFMPEG_BIN, Utils.double_quoted(video_file_path), Utils.double_quoted(audio_file_path)
             )
         )
+        print(command)
         with subprocess.Popen(
             shlex.split(command),
             shell=False,
@@ -182,12 +184,12 @@ def extract_audio_from_start_to_end(self, audio_file_path: str, start: str, end:
 
         if end is not None:
             duration = self.get_duration_in_seconds(start, end)
-            command = "{0} -y -xerror -i '{1}' -ss {2} -t {3} -acodec copy '{4}'".format(
-                self.FFMPEG_BIN, audio_file_path, start, duration, segment_path
+            command = "{0} -y -xerror -i {1} -ss {2} -t {3} -acodec copy {4}".format(
+                self.FFMPEG_BIN, Utils.double_quoted(audio_file_path), start, duration, Utils.double_quoted(segment_path)
             )
         else:
-            command = "{0} -y -xerror -i '{1}' -ss {2} -acodec copy '{3}'".format(
-                self.FFMPEG_BIN, audio_file_path, start, segment_path
+            command = "{0} -y -xerror -i {1} -ss {2} -acodec copy {3}".format(
+                self.FFMPEG_BIN, Utils.double_quoted(audio_file_path), start, Utils.double_quoted(segment_path)
             )
         with subprocess.Popen(
             shlex.split(command),
@@ -316,7 +318,7 @@ def get_frame_rate(self, file_path: str) -> float:
         discarded = "NUL:" if os.name == "nt" else "/dev/null"
 
         with subprocess.Popen(
-                shlex.split("{0} -i '{1}' -t 00:00:10 -f null {2}".format(self.FFMPEG_BIN, file_path, discarded)),
+                shlex.split("{0} -i {1} -t 00:00:10 -f null {2}".format(self.FFMPEG_BIN, Utils.double_quoted(file_path), discarded)),
                 shell=False,
                 stderr=subprocess.PIPE,
                 close_fds=True,
diff --git a/subaligner/utils.py b/subaligner/utils.py
index 9375b16..fa50d0f 100644
--- a/subaligner/utils.py
+++ b/subaligner/utils.py
@@ -88,7 +88,7 @@ def srt2vtt(srt_file_path: str, vtt_file_path: Optional[str] = None, timeout_sec
 
         _vtt_file_path = srt_file_path.replace(".srt", ".vtt") if vtt_file_path is None else vtt_file_path
         encoding = Utils.detect_encoding(srt_file_path)
-        command = "{0} -y -sub_charenc {1} -i '{2}' -f webvtt '{3}'".format(Utils.FFMPEG_BIN, encoding, srt_file_path, _vtt_file_path)
+        command = "{0} -y -sub_charenc {1} -i {2} -f webvtt {3}".format(Utils.FFMPEG_BIN, encoding, Utils.double_quoted(srt_file_path), Utils.double_quoted(_vtt_file_path))
         timeout_msg = "Timeout on converting SubRip to WebVTT: {}".format(srt_file_path)
         error_msg = "Cannot convert SubRip to WebVTT: {}".format(srt_file_path)
 
@@ -115,7 +115,7 @@ def vtt2srt(vtt_file_path: str, srt_file_path: Optional[str] = None, timeout_sec
 
         _srt_file_path = vtt_file_path.replace(".vtt", ".srt") if srt_file_path is None else srt_file_path
         encoding = Utils.detect_encoding(vtt_file_path)
-        command = "{0} -y -sub_charenc {1} -i '{2}' -f srt '{3}'".format(Utils.FFMPEG_BIN, encoding, vtt_file_path, _srt_file_path)
+        command = "{0} -y -sub_charenc {1} -i {2} -f srt {3}".format(Utils.FFMPEG_BIN, encoding, Utils.double_quoted(vtt_file_path), Utils.double_quoted(_srt_file_path))
         timeout_msg = "Timeout on converting WebVTT to SubRip: {}".format(vtt_file_path)
         error_msg = "Cannot convert WebVTT to SubRip: {}".format(vtt_file_path)
 
@@ -492,7 +492,7 @@ def extract_teletext_as_subtitle(ts_file_path: str, page_num: int, output_file_p
             timeout_secs {int} -- The timeout in seconds on extraction {default: 30}.
         """
 
-        command = "{0} -y -fix_sub_duration -txt_page {1} -txt_format text -i '{2}' '{3}'".format(Utils.FFMPEG_BIN, page_num, ts_file_path, output_file_path)
+        command = "{0} -y -fix_sub_duration -txt_page {1} -txt_format text -i {2} {3}".format(Utils.FFMPEG_BIN, page_num, Utils.double_quoted(ts_file_path), Utils.double_quoted(output_file_path))
         timeout_msg = "Timeout on extracting Teletext from transport stream: {} on page: {}".format(ts_file_path, page_num)
         error_msg = "Cannot extract Teletext from transport stream: {} on page: {}".format(ts_file_path, page_num)
 
@@ -518,7 +518,7 @@ def extract_matroska_subtitle(mkv_file_path: str, stream_index: int, output_file
             timeout_secs {int} -- The timeout in seconds on extraction {default: 30}.
         """
 
-        command = "{0} -y -i '{1}' -map 0:s:{2} '{3}'".format(Utils.FFMPEG_BIN, mkv_file_path, stream_index, output_file_path)
+        command = "{0} -y -i {1} -map 0:s:{2} {3}".format(Utils.FFMPEG_BIN, Utils.double_quoted(mkv_file_path), stream_index, Utils.double_quoted(output_file_path))
         timeout_msg = "Timeout on extracting the subtitle from file: {} with stream index: {}".format(mkv_file_path, stream_index)
         error_msg = "Cannot extract the subtitle from file: {} with stream index: {}".format(mkv_file_path, stream_index)
 
@@ -570,7 +570,7 @@ def contains_embedded_subtitles(video_file_path: str, timeout_secs: int = 30) ->
             bool -- True if the video contains embedded subtitles or False otherwise.
         """
 
-        command = "{0} -y -i '{1}' -c copy -map 0:s -f null - -v 0 -hide_banner".format(Utils.FFMPEG_BIN, video_file_path)
+        command = "{0} -y -i {1} -c copy -map 0:s -f null - -v 0 -hide_banner".format(Utils.FFMPEG_BIN, Utils.double_quoted(video_file_path))
         timeout_msg = "Timeout on detecting embedded subtitles from file: {}".format(video_file_path)
         error_msg = "Embedded subtitle detection failed for file: {}".format(video_file_path)
 
@@ -688,6 +688,10 @@ def format_timestamp(seconds: float) -> str:
         hours_marker = f"{hours:02d}:"
         return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
 
+    @staticmethod
+    def double_quoted(s: str) -> str:
+        return "\"{}\"".format(s.replace('"', "\\\""))
+
     @staticmethod
     def __convert_subtitle(source_file_path: str, source_ext: str, target_file_path: Optional[str], target_ext: str, format: str, frame_rate: Optional[float] = None) -> Tuple[str, str]:
         encoding = Utils.detect_encoding(source_file_path)
diff --git a/tests/subaligner/test_utils.py b/tests/subaligner/test_utils.py
index 7581765..da6689c 100644
--- a/tests/subaligner/test_utils.py
+++ b/tests/subaligner/test_utils.py
@@ -245,7 +245,7 @@ def test_ytt2srt(self):
     def test_extract_teletext_as_srt(self, mocked_run_command):
         Undertest.extract_teletext_as_subtitle("ts_file_path", 888, "srt_file_path")
 
-        mocked_run_command.assert_called_once_with("ffmpeg -y -fix_sub_duration -txt_page 888 -txt_format text -i {} {}".format("'ts_file_path'", "'srt_file_path'"), ANY, ANY, ANY, ANY)
+        mocked_run_command.assert_called_once_with("ffmpeg -y -fix_sub_duration -txt_page 888 -txt_format text -i {} {}".format("\"ts_file_path\"", "\"srt_file_path\""), ANY, ANY, ANY, ANY)
 
     def test_extract_matroska_subtitle(self):
         output_file_path = os.path.join(self.resource_tmp, "extracted.matroska.srt")
@@ -312,6 +312,10 @@ def test_format_timestamp(self):
         for seconds, time_code in test_cases:
             self.assertEqual(time_code, Undertest.format_timestamp(seconds))
 
+    def test_double_quoted(self):
+        self.assertEqual("\"file'path\"", Undertest.double_quoted("file'path"))
+        self.assertEqual("\"file\\\"path\"", Undertest.double_quoted("file\"path"))
+
     @patch("subprocess.Popen.communicate", return_value=1)
     def test_throw_exception_on_srt2vtt_with_error_code(self, mock_communicate):
         self._assert_exception_on_subproces(lambda: Undertest.srt2vtt(self.real_srt_path, "output"), mock_communicate)

From 87fd6e3eb932d4c8f0746b09e40ba2176a31b62f Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 20 Mar 2023 09:29:10 +0000
Subject: [PATCH 10/20] support tensorflow 2.11 and deprecate py37

---
 .github/workflows/ci-pipeline.yml |  2 +-
 Pipfile                           |  9 +++------
 README.md                         | 11 +++++++----
 requirements.txt                  | 10 +++-------
 site/source/advanced_usage.rst    | 28 ++++++++++++++--------------
 site/source/installation.rst      |  8 ++++----
 6 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
index 169abcc..65bfb67 100644
--- a/.github/workflows/ci-pipeline.yml
+++ b/.github/workflows/ci-pipeline.yml
@@ -45,7 +45,7 @@ jobs:
           coverage run -m unittest discover
           coverage combine
           coverage xml
-          bash <(curl -s https://codecov.io/bash)
+          bash <(curl -s https://codecov.io/bash) -n patch -F 90
       - name: Integration tests
         run: |
           radish -b tests/integration/radish tests/integration/feature
diff --git a/Pipfile b/Pipfile
index 934cca0..791b29b 100644
--- a/Pipfile
+++ b/Pipfile
@@ -22,10 +22,8 @@ sphinx = "==3.3.1"
 sphinx-rtd-theme = "==0.5.0"
 
 [packages]
-absl-py = "~=0.10"
 astor = "==0.7.1"
 astroid = "~=2.5.6"
-audioread = "==2.1.5"
 beautifulsoup4 = "<4.9.0"
 bleach = "==3.3.0"
 cachetools = "==3.1.1"
@@ -56,11 +54,10 @@ Keras-Preprocessing = ">=1.0.9"
 kiwisolver = "==1.0.1"
 lazy-object-proxy = "==1.4.3"
 le-pycaption = "==2.2.0a1"
-librosa = ">=0.8.0"
+librosa = "<0.10.0"
 locket = "==0.2.0"
 Markdown = "==2.6.11"
 mccabe = "==0.6.1"
-msgpack-python = "==0.5.6"
 numba = ">=0.50.0"
 numpy = "<1.24.0"
 oauthlib = "==3.1.0"
@@ -92,7 +89,7 @@ sentencepiece = "~=0.1.95"
 setuptools = ">=41.0.0"
 six = "~=1.15.0"
 tblib = "==1.3.2"
-tensorflow = ">=1.15.5,<2.9"
+tensorflow = ">=1.15.5,<2.12"
 termcolor = "==1.1.0"
 toml = "==0.10.0"
 toolz = "==0.9.0"
@@ -107,4 +104,4 @@ zipp = "==0.6.0"
 aeneas = "==1.7.3.0"
 
 [requires]
-python_version = "3.7"
+python_version = "3.8"
diff --git a/README.md b/README.md
index 9713282..7229920 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 </div>
 
 [![Build Status](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml/badge.svg?branch=master)](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml?query=branch%3Amaster) ![Codecov](https://img.shields.io/codecov/c/github/baxtree/subaligner)
-[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
+[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/)
 [![Documentation Status](https://readthedocs.org/projects/subaligner/badge/?version=latest)](https://subaligner.readthedocs.io/en/latest/?badge=latest)
 [![GitHub license](https://img.shields.io/github/license/baxtree/subaligner)](https://github.com/baxtree/subaligner/blob/master/LICENSE)
 [![PyPI](https://badge.fury.io/py/subaligner.svg)](https://badge.fury.io/py/subaligner)
@@ -16,6 +16,8 @@ Subtitle: SubRip, TTML, WebVTT, (Advanced) SubStation Alpha, MicroDVD, MPL2, TMP
 
 Video/Audio: MP4, WebM, Ogg, 3GP, FLV, MOV, Matroska, MPEG TS, WAV, MP3, AAC, FLAC, etc.
 
+:information_source: <small style="line-height: 1.2;">Subaligner relies on file extensions as default hints to process a wide range of audiovisual or subtitle formats. It is recommended to use extensions widely acceppted by the community to ensure compatibility.</small>
+
 ## Dependencies
 Required by basic: [FFmpeg](https://www.ffmpeg.org/)
 ```
@@ -28,15 +30,16 @@ $ brew install ffmpeg
 
 ## Basic Installation
 ```
-$ pip install -U pip
+$ pip install -U pip && pip install -U setuptools
 $ pip install subaligner
 ```
 or install from source:
 ```
-$ git clone git@github.com:baxtree/subaligner.git
-$ cd subaligner
+$ git clone git@github.com:baxtree/subaligner.git && cd subaligner
+$ pip install -U pip && pip install -U setuptools
 $ python setup.py install
 ```
+:information_source: <small style="line-height: 1.2;">It is highly recommended creating a virtual environment prior to installation.</small>
 
 ## Installation with Optional Packages Supporting Additional Features
 ```
diff --git a/requirements.txt b/requirements.txt
index 5d2a4a0..92c23bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,4 @@
-absl-py~=0.10
 astor==0.7.1
-audioread==2.1.5
 beautifulsoup4<4.9.0
 bleach==3.3.0
 cachetools==3.1.1
@@ -16,7 +14,6 @@ dask<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
 filelock<4.0.0
-google-auth==1.27.0
 google-auth-oauthlib==0.4.2
 google-pasta~=0.2
 graphviz==0.8.3
@@ -31,12 +28,11 @@ Keras-Preprocessing>=1.0.9
 kiwisolver==1.0.1
 lazy-object-proxy==1.4.3
 le-pycaption==2.2.0a1
-librosa>=0.8.0
+librosa<0.10.0
 locket==0.2.0
 Markdown==2.6.11
 mccabe==0.6.1
 networkx>=2.5.1
-msgpack-python==0.5.6
 numba>=0.50.0
 numpy<1.24.0
 oauthlib==3.1.0
@@ -59,11 +55,11 @@ requests~=2.25.1
 requests-oauthlib==1.3.0
 rsa==4.7
 scipy<=1.8.1
-scikit-learn~=0.24.2
+scikit-learn<1.2.0
 setuptools>=41.0.0
 six~=1.15.0
 tblib==1.3.2
-tensorflow>=1.15.5,<2.9
+tensorflow>=1.15.5,<2.12
 termcolor==1.1.0
 toml==0.10.0
 toolz==0.9.0
diff --git a/site/source/advanced_usage.rst b/site/source/advanced_usage.rst
index 952ed59..c49f424 100644
--- a/site/source/advanced_usage.rst
+++ b/site/source/advanced_usage.rst
@@ -7,7 +7,7 @@ the model can be imported and used for synchronising out-of-sync subtitles.
 
 **Start fresh training**::
 
-    (.venv) $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory
+    $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory
 
 Make sure each subtitle file and its companion audiovisual file are sharing the same base filename, e.g.,
 "awesome.mp4" and "awesome.srt" share the base filename "awesome". Then split them into two separate folders, e.g.,
@@ -17,7 +17,7 @@ the results after training is finished and make sure it is writable to Subaligne
 
 **Resume training**::
 
-    (.venv) $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory -e 200 -r
+    $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory -e 200 -r
 
 Training over a large dataset is usually an expensive process and time consuming. You can stop the training and resume it with
 `-r` or `--resume` at another convenient time to enhance an existing model stored in the aforementioned training output
@@ -26,14 +26,14 @@ already completed in the past. If the number is forgotten, you can pass in `-dde
 
 **Display completed epochs**::
 
-    (.venv) $ subaligner_train -dde -tod training_output_directory
+    $ subaligner_train -dde -tod training_output_directory
 
 Also note that on training resumption, av_directory and subtitle_directory will be ignored due to the reuse of feature
 embedding by default.
 
 **Reuse embeddings**::
 
-    (.venv) $ subaligner_train -utd -tod training_output_directory
+    $ subaligner_train -utd -tod training_output_directory
 
 Embeddings extracted from your media files can be reused with `-utd` or `--use_training_dump`. With that flag on, you can train a new
 model of another kind (instead of re-using the same model on training resumption) without going through the feature embedding process,
@@ -41,7 +41,7 @@ which could take quite long to finish for a large dataset so as to be unnecessar
 
 **Ignore sound effects**::
 
-    (.venv) $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory --sound_effect_start_marker "(" --sound_effect_end_marker ")"
+    $ subaligner_train -vd av_directory -sd subtitle_directory -tod training_output_directory --sound_effect_start_marker "(" --sound_effect_end_marker ")"
 
 It is not uncommon that subtitles sometimes contain sound effects (e.g., "BARK", "(applause)" and "[MUSIC]", etc.). For limited training
 data sets and not sophisticated enough network architectures, the model usually cannot capture all the sound effects very well.
@@ -51,7 +51,7 @@ For example, the above exemplary command will treat any strings starting with "(
 
 **Train with embedded subtitles**::
 
-    (.venv) $ subaligner_train -vd av_directory -ess embedded:stream_index=0,file_extension=srt -tod training_output_directory
+    $ subaligner_train -vd av_directory -ess embedded:stream_index=0,file_extension=srt -tod training_output_directory
 
 If your audiovisual files all contain embedded subtitles or teletexts of the same format and have been encoded in the same fashion, `-sd` or `--subtitle_directory`
 can be omitted and subtitles will be extracted based on the specified subtitle selector. For instance, "embedded:stream_index=0,file_extension=srt"
@@ -62,8 +62,8 @@ is present, make sure the folder passed in is empty.
 
 **Run alignments after training**::
 
-    (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt -tod training_output_directory
-    (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -tod training_output_directory
+    $ subaligner -m single -v video.mp4 -s subtitle.srt -tod training_output_directory
+    $ subaligner -m dual -v video.mp4 -s subtitle.srt -tod training_output_directory
 
 To apply your trained model to subtitle alignment, pass in the training_output_directory containing training results as
 shown above with `-tod` or `--training_output_directory`.
@@ -96,7 +96,7 @@ Subaligner tune hyperparameters automatically and the how-to is shown below.
 
 **Hyperparameters tuning**::
 
-     (.venv) $ subaligner_tune -vd av_directory -sd subtitle_directory -tod training_output_directory
+     $ subaligner_tune -vd av_directory -sd subtitle_directory -tod training_output_directory
 
 Subaligner has used the `Tree-structured Parzen Estimator Approach (TPE) <https://en.wikipedia.org/wiki/Kernel_density_estimation>`_ to
 automatically run trails on different settings of hyper-parameter values and recommend the best one. You can pass in the following
@@ -115,17 +115,17 @@ flags to customise the configuration on tuning:
 
 **Convert the subtitle to another format**::
 
-    (.venv) $ subaligner_convert -i subtitle.srt -o subtitle.vtt
+    $ subaligner_convert -i subtitle.srt -o subtitle.vtt
 
 **Convert the subtitle to another format and translate**::
 
-    (.venv) $ subaligner_convert --languages
-    (.venv) $ subaligner_convert -i subtitle_en.srt -o subtitle_zh.vtt -t eng,zho
+    $ subaligner_convert --languages
+    $ subaligner_convert -i subtitle_en.srt -o subtitle_zh.vtt -t eng,zho
 
 **Translate the subtitle without changing the format**::
 
-    (.venv) $ subaligner_convert --languages
-    (.venv) $ subaligner_convert -i subtitle_en.srt -o subtitle_es.srt -t eng,spa
+    $ subaligner_convert --languages
+    $ subaligner_convert -i subtitle_en.srt -o subtitle_es.srt -t eng,spa
 
 For output subtitles like MicroDVD relying on the frame rate, its value needs to be passed in with `-fr` or `--frame_rate`.
 
diff --git a/site/source/installation.rst b/site/source/installation.rst
index 8e8da43..e85391f 100644
--- a/site/source/installation.rst
+++ b/site/source/installation.rst
@@ -14,7 +14,7 @@ Installation
 
 **Install Subaligner via PyPI (pre-emptive NumPy)**::
 
-    $ pip install -U pip
+    $ pip install -U pip && pip install -U setuptools
     $ pip install subaligner
 
 **Install dependencies for enabling translation**::
@@ -65,9 +65,9 @@ to create a virtual environment and set up all the dependencies:
 
 **Install Subaligner from source**::
 
-    $ git clone git@github.com:baxtree/subaligner.git
-    $ cd subaligner
-    $ make install && source .venv/bin/activate
+    $ git clone git@github.com:baxtree/subaligner.git && cd subaligner
+    $ pip install -U pip && pip install -U setuptools
+    $ python setup.py install
 
 **Subaligner CLI should be on your PATH now**::
 

From 35786e175bb28ae051cca049dcd3adf391aee3e8 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Wed, 22 Mar 2023 09:41:15 +0000
Subject: [PATCH 11/20] add fb mbart models for translation

---
 subaligner/__main__.py              | 20 +++----
 subaligner/llm.py                   |  5 ++
 subaligner/translator.py            | 86 ++++++++++++++++++++++++-----
 tests/subaligner/test_translator.py | 26 ++++++++-
 4 files changed, 111 insertions(+), 26 deletions(-)

diff --git a/subaligner/__main__.py b/subaligner/__main__.py
index 63cc178..d2a2d6e 100755
--- a/subaligner/__main__.py
+++ b/subaligner/__main__.py
@@ -4,15 +4,15 @@
                   [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
                   [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
                   [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
-                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-tr {helsinki-nlp,whisper}] [-tf TRANSLATION_FLAVOUR] [-lgs]
-                  [-d] [-q] [-ver]
+                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}]
+                  [-tf TRANSLATION_FLAVOUR] [-lgs] [-d] [-q] [-ver]
 
 Subaligner command line interface
 
 optional arguments:
   -h, --help            show this help message and exit
   -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
-                        File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .sub, .ytt, .smi, .sami, .tmp, .txt, .ssa, .vtt, .stl, .xml, .ass, .scc, .dfxp, .sbv, .srt) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
+                        File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .ssa, .stl, .sbv, .dfxp, .srt, .txt, .ytt, .vtt, .sub, .sami, .xml, .scc, .ass, .smi, .tmp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
   -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
                         Max global log loss for alignment
   -so, --stretch_on     Switch on stretch on subtitles)
@@ -30,11 +30,11 @@
                         Offset by which the subtitle will be shifted
   -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
                         Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
-  -mr {whisper}, --llm_recipe {whisper}
+  -mr {whisper}, --transcription_recipe {whisper}
                         LLM recipe used for transcribing video files
-  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
+  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
                         Flavour variation for a specific LLM recipe supporting transcription
-  -tr {helsinki-nlp,whisper}, --translation_recipe {helsinki-nlp,whisper}
+  -tr {helsinki-nlp,whisper,facebook-mbart}, --translation_recipe {helsinki-nlp,whisper,facebook-mbart}
                         LLM recipe used for translating subtitles
   -tf TRANSLATION_FLAVOUR, --translation_flavour TRANSLATION_FLAVOUR
                         Flavour variation for a specific LLM recipe supporting translation
@@ -161,7 +161,7 @@ def main():
     from subaligner.llm import WhisperFlavour
     parser.add_argument(
         "-mr",
-        "--llm_recipe",
+        "--transcription_recipe",
         type=str.lower,
         default=TranscriptionRecipe.WHISPER.value,
         choices=[r.value for r in TranscriptionRecipe],
@@ -169,7 +169,7 @@ def main():
     )
     parser.add_argument(
         "-mf",
-        "--llm_flavour",
+        "--transcription_flavour",
         type=str.lower,
         default=WhisperFlavour.SMALL.value,
         choices=[wf.value for wf in WhisperFlavour],
@@ -322,7 +322,7 @@ def main():
                     )
                 elif FLAGS.mode == "transcribe":
                     from subaligner.transcriber import Transcriber
-                    transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
+                    transcriber = Transcriber(recipe=FLAGS.transcription_recipe, flavour=FLAGS.transcription_flavour)
                     subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
                     aligned_subs = subtitle.subs
                 else:
@@ -337,7 +337,7 @@ def main():
                     from subaligner.translator import Translator
                     source, target = FLAGS.translate.split(",")
                     translator = Translator(src_language=source, tgt_language=target, recipe=FLAGS.translation_recipe, flavour=FLAGS.translation_flavour)
-                    aligned_subs = translator.translate(aligned_subs, local_video_path)
+                    aligned_subs = translator.translate(aligned_subs, local_video_path, (source, target))
                     Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
                                                         frame_rate, "utf-8")
                 elif FLAGS.mode == "transcribe":
diff --git a/subaligner/llm.py b/subaligner/llm.py
index 7d7aecf..3475ac0 100644
--- a/subaligner/llm.py
+++ b/subaligner/llm.py
@@ -8,6 +8,7 @@ class TranscriptionRecipe(Enum):
 class TranslationRecipe(Enum):
     HELSINKI_NLP = "helsinki-nlp"
     WHISPER = "whisper"
+    FACEBOOK_MBART = "facebook-mbart"
 
 
 class WhisperFlavour(Enum):
@@ -27,3 +28,7 @@ class HelsinkiNLPFlavour(Enum):
     OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
     OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
     OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
+
+
+class FacebookMbartFlavour(Enum):
+    LARGE = "large"
diff --git a/subaligner/translator.py b/subaligner/translator.py
index 723183e..d4b66bc 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -2,13 +2,20 @@
 import time
 import whisper
 from copy import deepcopy
-from typing import List, Generator, Optional
+from typing import List, Generator, Optional, Tuple
 from pysrt import SubRipItem
 from tqdm import tqdm
-from transformers import MarianMTModel, MarianTokenizer
+from transformers import (
+    PreTrainedModel,
+    PreTrainedTokenizer,
+    MarianMTModel,
+    MarianTokenizer,
+    MBart50TokenizerFast,
+    MBartForConditionalGeneration,
+)
 from whisper.tokenizer import LANGUAGES
 from .singleton import Singleton
-from .llm import TranslationRecipe, HelsinkiNLPFlavour, WhisperFlavour
+from .llm import TranslationRecipe, HelsinkiNLPFlavour, WhisperFlavour, FacebookMbartFlavour
 from .utils import Utils
 from .subtitle import Subtitle
 from .logger import Logger
@@ -47,6 +54,16 @@ class Translator(object):
         "eng-jpn": "eng-jap",
         "jpn-eng": "jap-eng"
     }
+    __MBART_LANGUAGE_CODE_MAPPER = {
+        "ara": "ar_AR", "ces": "cs_CZ", "deu": "de_DE", "eng": "en_XX", "spa": "es_XX", "est": "et_EE", "fin": "fi_FI",
+        "fra": "fr_XX", "guj": "gu_IN", "hin": "hi_IN", "ita": "it_IT", "jpn": "ja_XX", "kaz": "kk_KZ", "kor": "ko_KR",
+        "lit": "lt_LT", "lav": "lv_LV", "mya": "my_MM", "nep": "ne_NP", "nld": "nl_XX", "ron": "ro_RO", "rus": "ru_RU",
+        "sin": "si_LK", "tur": "tr_TR", "vie": "vi_VN", "zho": "zh_CN", "afr": "af_ZA", "aze": "az_AZ", "ben": "bn_IN",
+        "fas": "fa_IR", "heb": "he_IL", "hrv": "hr_HR", "ind": "id_ID", "kat": "ka_GE", "khm": "km_KH", "mkd": "mk_MK",
+        "mal": "ml_IN", "mon": "mn_MN", "mar": "mr_IN", "pol": "pl_PL", "pus": "ps_AF", "por": "pt_XX", "swe": "sv_SE",
+        "swa": "sw_KE", "tam": "ta_IN", "tel": "te_IN", "tha": "th_TH", "tgl": "tl_XX", "ukr": "uk_UA", "urd": "ur_PK",
+        "xho": "xh_ZA", "glg": "gl_ES", "slv": "sl_SI"
+    }
 
     def __init__(self,
                  src_language: str,
@@ -69,7 +86,10 @@ def __init__(self,
         if recipe not in [r.value for r in TranslationRecipe]:
             raise NotImplementedError(f"Unknown recipe: {recipe}")
         self.__recipe = recipe
+        self.__src_language = src_language
         self.__tgt_language = tgt_language
+        self.__tokenizer: PreTrainedTokenizer = None
+        self.__lang_model: PreTrainedModel = None
         self.__initialise_model(src_language, tgt_language, recipe, flavour)
 
     @staticmethod
@@ -102,28 +122,36 @@ def normalise_pair(src_language: str, tgt_language: str) -> List[str]:
         else:
             return [src_language, tgt_language]
 
-    def translate(self, subs: List[SubRipItem], video_file_path: Optional[str] = None) -> List[SubRipItem]:
+    def translate(self,
+                  subs: List[SubRipItem],
+                  video_file_path: Optional[str] = None,
+                  language_pair: Optional[Tuple[str, str]] = None) -> List[SubRipItem]:
         """Translate a list of subtitle cues.
 
         Arguments:
             subs {list} -- A list of SubRipItems.
-            video_file_path {string} -- The input video file path (default: None)..
+
+        Keyword Arguments:
+            video_file_path {string} -- The input video file path (default: None).
+            language_pair {Tuple[str, str]} -- Used for overriding the default language pair (default: None).
 
         Returns:
             {list} -- A list of new SubRipItems holding the translation results.
         """
 
         if self.__recipe == TranslationRecipe.HELSINKI_NLP.value:
+            if language_pair is not None:
+                self.__LOGGER.debug(f"Language pair ignored: {language_pair}")
             translated_texts = []
-            self.lang_model.eval()
+            self.__lang_model.eval()
             new_subs = deepcopy(subs)
             src_texts = [sub.text for sub in new_subs]
             num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
             self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
             for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
-                input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
-                translated = self.lang_model.generate(**input_ids)
-                translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
+                input_ids = self.__tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
+                translated = self.__lang_model.generate(**input_ids)
+                translated_texts.extend([self.__tokenizer.decode(t, skip_special_tokens=True) for t in translated])
             for index in range(len(new_subs)):
                 new_subs[index].text = translated_texts[index]
             self.__LOGGER.info("Subtitle translated")
@@ -135,7 +163,7 @@ def translate(self, subs: List[SubRipItem], video_file_path: Optional[str] = Non
                 raise TranslationException(f'"{self.__tgt_language}" is not supported by {self.__recipe}')
             audio = whisper.load_audio(video_file_path)
             self.__LOGGER.debug("Start translating the audio...")
-            result = self.lang_model.transcribe(audio, task="translate", language=LANGUAGES[lang])
+            result = self.__lang_model.transcribe(audio, task="translate", language=LANGUAGES[lang])
             self.__LOGGER.info("Finished translating the audio")
             srt_str = ""
             for i, segment in enumerate(result["segments"], start=1):
@@ -145,6 +173,26 @@ def translate(self, subs: List[SubRipItem], video_file_path: Optional[str] = Non
                            "\n"
             subtitle = Subtitle.load_subrip_str(srt_str)
             return subtitle.subs
+        elif self.__recipe == TranslationRecipe.FACEBOOK_MBART.value:
+            src_lang, tgt_lang = language_pair if language_pair is not None else (self.__src_language, self.__tgt_language)
+            self.__tokenizer.src_lang = Translator.__MBART_LANGUAGE_CODE_MAPPER.get(src_lang, None)
+            lang_code = Translator.__MBART_LANGUAGE_CODE_MAPPER.get(tgt_lang, None)
+            if src_lang is None or tgt_lang is None:
+                raise NotImplementedError(f"Language pair of {src_lang} and {src_lang} is not supported")
+            translated_texts = []
+            self.__lang_model.eval()
+            new_subs = deepcopy(subs)
+            src_texts = [sub.text for sub in new_subs]
+            num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
+            self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
+            for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
+                input_ids = self.__tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
+                translated = self.__lang_model.generate(**input_ids, forced_bos_token_id=self.__tokenizer.lang_code_to_id[lang_code])
+                translated_texts.extend([self.__tokenizer.decode(t, skip_special_tokens=True) for t in translated])
+            for index in range(len(new_subs)):
+                new_subs[index].text = translated_texts[index]
+            self.__LOGGER.info("Subtitle translated")
+            return new_subs
         else:
             return []
 
@@ -170,6 +218,11 @@ def __initialise_model(self, src_lang: str, tgt_lang: str, recipe: str, flavour:
                 self.__download_whisper_model("medium")  # works for translation target other than English
             else:
                 raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
+        elif recipe == TranslationRecipe.FACEBOOK_MBART.value:
+            if flavour in [f.value for f in FacebookMbartFlavour]:
+                self.__download_mbart_model(flavour)
+            else:
+                raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
 
     def __download_mt_model(self, src_lang: str, tgt_lang: str, flavour: str) -> bool:
         try:
@@ -199,12 +252,19 @@ def __download_mt_model(self, src_lang: str, tgt_lang: str, flavour: str) -> boo
         return False
 
     def __download_whisper_model(self, flavour: str) -> None:
-        self.lang_model = whisper.load_model(flavour)
+        self.__lang_model = whisper.load_model(flavour)
+
+    def __download_mbart_model(self, flavour: str) -> None:
+        mbart_model_name = f"facebook/mbart-{flavour}-50-many-to-many-mmt"
+        self.__LOGGER.debug("Trying to download the mBART model %s" % mbart_model_name)
+        self.__tokenizer = MBart50TokenizerFast.from_pretrained(mbart_model_name)
+        self.__lang_model = MBartForConditionalGeneration.from_pretrained(mbart_model_name)
+        self.__LOGGER.debug("mBART model %s downloaded" % mbart_model_name)
 
     def __download_by_mt_name(self, mt_model_name: str) -> None:
         self.__LOGGER.debug("Trying to download the MT model %s" % mt_model_name)
-        self.tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
-        self.lang_model = MarianMTModel.from_pretrained(mt_model_name)
+        self.__tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
+        self.__lang_model = MarianMTModel.from_pretrained(mt_model_name)
         self.__LOGGER.debug("MT model %s downloaded" % mt_model_name)
 
     def __log_and_back_off(self, mt_model_name: str):
diff --git a/tests/subaligner/test_translator.py b/tests/subaligner/test_translator.py
index a116f2f..897b935 100644
--- a/tests/subaligner/test_translator.py
+++ b/tests/subaligner/test_translator.py
@@ -3,6 +3,7 @@
 from mock import Mock, patch
 from parameterized import parameterized
 from subaligner.subtitle import Subtitle
+from subaligner.llm import TranslationRecipe, HelsinkiNLPFlavour, FacebookMbartFlavour
 from subaligner.translator import Translator as Undertest
 
 
@@ -13,19 +14,38 @@ def setUp(self):
             os.path.dirname(os.path.abspath(__file__)), "resource/test.srt"
         )
 
-    @patch("transformers.MarianMTModel.from_pretrained")
     @patch("transformers.MarianTokenizer.from_pretrained")
-    def test_translate(self, tokenizer_from_pretrained, model_from_pretrained):
+    @patch("transformers.MarianMTModel.from_pretrained")
+    def test_translate_hel_nlp(self, model_from_pretrained, tokenizer_from_pretrained):
+        subs = Subtitle.load(self.srt_file_path).subs
+        mock_tokenizer = Mock()
+        mock_tokenizer.return_value = {"input_ids": None, "attention_mask": None}
+        mock_tokenizer.decode.return_value = "translated"
+        mock_model = Mock()
+        mock_model.generate.return_value = [None] * len(subs)
+        tokenizer_from_pretrained.return_value = mock_tokenizer
+        model_from_pretrained.return_value = mock_model
+
+        undertest = Undertest("eng", "zho", recipe=TranslationRecipe.HELSINKI_NLP.value)
+        translated_subs = undertest.translate(subs)
+
+        self.assertEqual(["translated"] * len(subs), [*map(lambda x: x.text, translated_subs)])
+
+    @patch("transformers.MBart50TokenizerFast.from_pretrained")
+    @patch("transformers.MBartForConditionalGeneration.from_pretrained")
+    def test_translate_fb_mbart(self, model_from_pretrained, tokenizer_from_pretrained):
         subs = Subtitle.load(self.srt_file_path).subs
         mock_tokenizer = Mock()
         mock_tokenizer.return_value = {"input_ids": None, "attention_mask": None}
         mock_tokenizer.decode.return_value = "translated"
+        mock_tokenizer.lang_code_to_id = {"zh_CN": 250025}
         mock_model = Mock()
         mock_model.generate.return_value = [None] * len(subs)
         tokenizer_from_pretrained.return_value = mock_tokenizer
         model_from_pretrained.return_value = mock_model
 
-        translated_subs = Undertest("eng", "zho").translate(subs)
+        undertest = Undertest("eng", "zho", recipe=TranslationRecipe.FACEBOOK_MBART.value, flavour=FacebookMbartFlavour.LARGE.value)
+        translated_subs = undertest.translate(subs)
 
         self.assertEqual(["translated"] * len(subs), [*map(lambda x: x.text, translated_subs)])
 

From 1228a9c39de9e103fc244cdfffa400cca4bc28dd Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Wed, 29 Mar 2023 09:25:56 +0100
Subject: [PATCH 12/20] update dependencies

---
 Pipfile                             |  4 +-
 requirements.txt                    |  3 +-
 subaligner/translator.py            | 64 ++++-------------------------
 tests/subaligner/test_translator.py | 47 +++++++--------------
 4 files changed, 27 insertions(+), 91 deletions(-)

diff --git a/Pipfile b/Pipfile
index 791b29b..f84d858 100644
--- a/Pipfile
+++ b/Pipfile
@@ -34,7 +34,7 @@ click = "==5.1"
 cloudpickle = "==0.5.3"
 cycler = "==0.10.0"
 Cython = "~=0.29.22"
-dask = "<2022.1.0"
+dask = ">=2021.10.0,<2022.1.0"
 decorator = "==4.3.0"
 distributed = "==1.13.0"
 filelock = "<4.0.0"
@@ -48,7 +48,7 @@ html5lib = "==1.0b9"
 hyperopt = "==0.2.4"
 idna = "==2.8"
 isort = "==4.3.4"
-joblib = "==0.11"
+joblib = ">=1.2.0"
 Keras-Applications = ">=1.0.8"
 Keras-Preprocessing = ">=1.0.9"
 kiwisolver = "==1.0.1"
diff --git a/requirements.txt b/requirements.txt
index 92c23bb..1818ca4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ click==5.1
 cloudpickle~=1.6.0
 cycler==0.10.0
 Cython~=0.29.22
-dask<2022.1.0
+dask>=2021.10.0,<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
 filelock<4.0.0
@@ -23,6 +23,7 @@ html5lib==1.0b9
 hyperopt==0.2.4
 idna==2.8
 isort==4.3.4
+joblib>=1.2.0
 Keras-Applications>=1.0.8
 Keras-Preprocessing>=1.0.9
 kiwisolver==1.0.1
diff --git a/subaligner/translator.py b/subaligner/translator.py
index d4b66bc..2cd977d 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -28,29 +28,12 @@ class Translator(object):
 
     __TENSOR_TYPE = "pt"
     __TRANSLATING_BATCH_SIZE = 10
-    __LANGUAGE_CODE_MAPPER = {
-        "bos": "zls",
-        "cmn": "zho",
-        "gla": "cel",
-        "grc": "grk",
-        "guj": "inc",
-        "ina": "art",
-        "jbo": "art",
-        "kan": "dra",
-        "kir": "trk",
-        "lat": "itc",
-        "lfn": "art",
-        "mya": "sit",
-        "nep": "inc",
-        "ori": "inc",
-        "sin": "inc",
-        "srp": "zls",
-        "tam": "dra",
-        "tat": "trk",
-        "tel": "dra",
-        "yue": "zho"
+    __HELSINKI_LANGUAGE_CODE_MAPPER = {
+        "bos": "zls", "cmn": "zho", "gla": "cel", "grc": "grk", "guj": "inc", "ina": "art", "jbo": "art", "kan": "dra",
+        "kir": "trk", "lat": "itc", "lfn": "art", "mya": "sit", "nep": "inc", "ori": "inc", "sin": "inc", "srp": "zls",
+        "tam": "dra", "tat": "trk", "tel": "dra", "yue": "zho"
     }
-    __LANGUAGE_PAIR_MAPPER = {
+    __HELSINKI_LANGUAGE_PAIR_MAPPER = {
         "eng-jpn": "eng-jap",
         "jpn-eng": "jap-eng"
     }
@@ -92,36 +75,6 @@ def __init__(self,
         self.__lang_model: PreTrainedModel = None
         self.__initialise_model(src_language, tgt_language, recipe, flavour)
 
-    @staticmethod
-    def normalise_single(language_code: str) -> str:
-        """Normalise a single language code.
-
-        Arguments:
-            language_code {string} -- A language code derived from ISO 639-3.
-
-        Returns:
-            string -- The language code understood by the language model.
-        """
-
-        return Translator.__LANGUAGE_CODE_MAPPER[language_code] if language_code in Translator.__LANGUAGE_CODE_MAPPER else language_code
-
-    @staticmethod
-    def normalise_pair(src_language: str, tgt_language: str) -> List[str]:
-        """Normalise a pair of language codes.
-
-        Arguments:
-            src_language {string} -- The source language code derived from ISO 639-3.
-            tgt_language {string} -- The target language code derived from ISO 639-3.
-
-        Returns:
-            list -- The language code pair understood by the language model.
-        """
-
-        if "{}-{}".format(src_language, tgt_language) in Translator.__LANGUAGE_PAIR_MAPPER:
-            return Translator.__LANGUAGE_PAIR_MAPPER["{}-{}".format(src_language, tgt_language)].split("-")
-        else:
-            return [src_language, tgt_language]
-
     def translate(self,
                   subs: List[SubRipItem],
                   video_file_path: Optional[str] = None,
@@ -198,9 +151,10 @@ def translate(self,
 
     def __initialise_model(self, src_lang: str, tgt_lang: str, recipe: str, flavour: Optional[str]) -> None:
         if recipe == TranslationRecipe.HELSINKI_NLP.value:
-            src_lang = Translator.normalise_single(src_lang)
-            tgt_lang = Translator.normalise_single(tgt_lang)
-            src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
+            src_lang = Translator.__HELSINKI_LANGUAGE_CODE_MAPPER.get(src_lang, src_lang)
+            tgt_lang = Translator.__HELSINKI_LANGUAGE_CODE_MAPPER.get(tgt_lang, tgt_lang)
+            lang_pair = "{}-{}".format(src_lang, tgt_lang)
+            src_lang, tgt_lang = Translator.__HELSINKI_LANGUAGE_PAIR_MAPPER.get(lang_pair, lang_pair).split("-")
 
             if self.__download_mt_model(src_lang, tgt_lang, HelsinkiNLPFlavour.OPUS_MT.value):
                 return
diff --git a/tests/subaligner/test_translator.py b/tests/subaligner/test_translator.py
index 897b935..6aa810c 100644
--- a/tests/subaligner/test_translator.py
+++ b/tests/subaligner/test_translator.py
@@ -3,7 +3,7 @@
 from mock import Mock, patch
 from parameterized import parameterized
 from subaligner.subtitle import Subtitle
-from subaligner.llm import TranslationRecipe, HelsinkiNLPFlavour, FacebookMbartFlavour
+from subaligner.llm import TranslationRecipe, HelsinkiNLPFlavour, WhisperFlavour, FacebookMbartFlavour
 from subaligner.translator import Translator as Undertest
 
 
@@ -31,6 +31,19 @@ def test_translate_hel_nlp(self, model_from_pretrained, tokenizer_from_pretraine
 
         self.assertEqual(["translated"] * len(subs), [*map(lambda x: x.text, translated_subs)])
 
+    @patch("whisper.load_audio")
+    @patch("whisper.load_model")
+    def test_translate_whisper(self, load_model, load_audio):
+        subs = Subtitle.load(self.srt_file_path).subs
+        model = Mock()
+        load_model.return_value = model
+        model.transcribe.return_value = {"segments": [{"start": 0, "end": 1, "text": "translated"}]}
+
+        undertest = Undertest("eng", "zho", recipe=TranslationRecipe.WHISPER.value, flavour=WhisperFlavour.TINY.value)
+        translated_subs = undertest.translate(subs, "video_path")
+
+        self.assertEqual(["translated"], [*map(lambda x: x.text, translated_subs)])
+
     @patch("transformers.MBart50TokenizerFast.from_pretrained")
     @patch("transformers.MBartForConditionalGeneration.from_pretrained")
     def test_translate_fb_mbart(self, model_from_pretrained, tokenizer_from_pretrained):
@@ -49,38 +62,6 @@ def test_translate_fb_mbart(self, model_from_pretrained, tokenizer_from_pretrain
 
         self.assertEqual(["translated"] * len(subs), [*map(lambda x: x.text, translated_subs)])
 
-    @parameterized.expand([
-        ["bos", "zls"],
-        ["cmn", "zho"],
-        ["gla", "cel"],
-        ["grc", "grk"],
-        ["guj", "inc"],
-        ["ina", "art"],
-        ["jbo", "art"],
-        ["kan", "dra"],
-        ["kir", "trk"],
-        ["lat", "itc"],
-        ["lfn", "art"],
-        ["mya", "sit"],
-        ["nep", "inc"],
-        ["ori", "inc"],
-        ["sin", "inc"],
-        ["srp", "zls"],
-        ["tam", "dra"],
-        ["tat", "trk"],
-        ["tel", "dra"],
-        ["yue", "zho"],
-    ])
-    def test_normalise_single(self, original, normalised):
-        self.assertEqual(normalised, Undertest.normalise_single(original))
-
-    @parameterized.expand([
-        ["eng-jpn", "eng-jap"],
-        ["jpn-eng", "jap-eng"],
-    ])
-    def test_normalise_pair(self, original, normalised):
-        self.assertEqual(normalised, "-".join(Undertest.normalise_pair(*original.split("-"))))
-
     @patch("transformers.MarianTokenizer.from_pretrained", side_effect=OSError)
     def test_throw_exception_on_translating_subs(self, mock_tokenizer_from_pretrained):
         subs = Subtitle.load(self.srt_file_path).subs

From 2731fdfddbae6d2498d6b52986e154be8b745440 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 14 Apr 2023 18:06:05 +0100
Subject: [PATCH 13/20] support transcription during batch processing

---
 Makefile                                |   2 +-
 README.md                               |  10 +-
 site/source/usage.rst                   |   8 +-
 subaligner/subaligner_batch/__main__.py | 148 +++++++++++++++++-------
 4 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/Makefile b/Makefile
index c756287..ff2eedf 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 ifdef PYTHON
 PYTHON := $(PYTHON)
 else
-PYTHON := 3.7.7
+PYTHON := 3.8.2
 endif
 
 ifdef PLATFORM
diff --git a/README.md b/README.md
index 7229920..71d9207 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@
 [![Documentation Status](https://readthedocs.org/projects/subaligner/badge/?version=latest)](https://subaligner.readthedocs.io/en/latest/?badge=latest)
 [![GitHub license](https://img.shields.io/github/license/baxtree/subaligner)](https://github.com/baxtree/subaligner/blob/master/LICENSE)
 [![PyPI](https://badge.fury.io/py/subaligner.svg)](https://badge.fury.io/py/subaligner)
-[![Docker Build](https://img.shields.io/docker/cloud/build/baxtree/subaligner?label=Docker&style=flat)](https://hub.docker.com/r/baxtree/subaligner/builds)
 [![Docker Pulls](https://img.shields.io/docker/pulls/baxtree/subaligner)](https://hub.docker.com/r/baxtree/subaligner)
 [![Citation](https://zenodo.org/badge/228440472.svg)](https://doi.org/10.5281/zenodo.5603083)
 
@@ -126,7 +125,14 @@ $ subaligner --languages
 $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
 $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
-$ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
+$ subaligner -m dual -v video.mp4 -tr helsinki-nlp -o subtitle_aligned.srt -t src,tgt
+$ subaligner -m dual -v video.mp4 -tr facebook-mbart -tf large -o subtitle_aligned.srt -t src,tgt
+$ subaligner -m dual -v video.mp4 -tr whisper -tf small -o subtitle_aligned.srt -t src,eng
+```
+```
+# Transcribe audiovisual files and generate translated subtitles
+
+$ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -tr helsinki-nlp -o subtitle_aligned.srt -t src,tgt
 ```
 ```
 # Shift subtitle manually by offset in seconds
diff --git a/site/source/usage.rst b/site/source/usage.rst
index b67ab88..67fc637 100644
--- a/site/source/usage.rst
+++ b/site/source/usage.rst
@@ -49,7 +49,13 @@ Make sure you have got the virtual environment activated upfront.
     (.venv) $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
     (.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
-    (.venv) $ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
+    (.venv) $ subaligner -m dual -v video.mp4 -tr helsinki-nlp -o subtitle_aligned.srt -t src,tgt
+    (.venv) $ subaligner -m dual -v video.mp4 -tr facebook-mbart -tf large -o subtitle_aligned.srt -t src,tgt
+    (.venv) $ subaligner -m dual -v video.mp4 -tr whisper -tf small -o subtitle_aligned.srt -t src,eng
+
+**Transcribe audiovisual files and generate translated subtitles**::
+
+    (.venv) $ subaligner -m transcribe -v video.mp4 -ml src -mr whisper -mf small -tr helsinki-nlp -o subtitle_aligned.srt -t src,tgt
 
 **Shift subtitle manually by offset in seconds**::
 
diff --git a/subaligner/subaligner_batch/__main__.py b/subaligner/subaligner_batch/__main__.py
index 002688e..e79fb56 100755
--- a/subaligner/subaligner_batch/__main__.py
+++ b/subaligner/subaligner_batch/__main__.py
@@ -1,8 +1,10 @@
 #!/usr/bin/env python
 """
-usage: subaligner_batch [-h] [-m {single,dual}] [-vd VIDEO_DIRECTORY] [-sd SUBTITLE_DIRECTORY] [-l MAX_LOGLOSS] [-so]
+usage: subaligner_batch [-h] [-m {single,dual,script,transcribe}] [-sd SUBTITLE_DIRECTORY] [-vd VIDEO_DIRECTORY] [-l MAX_LOGLOSS] [-so]
                         [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
-                        [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-od OUTPUT_DIRECTORY] [-t TRANSLATE] [-lgs] [-d] [-q] [-ver]
+                        [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-od OUTPUT_DIRECTORY] [-of {srt,ytt,ttml,txt,smi,xml,ssa,ass,dfxp,sub,scc,tmp,sami,vtt,stl,sbv}] [-t TRANSLATE]
+                        [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
+                        [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
 
 Batch align multiple subtitle files and audiovisual files
 
@@ -11,13 +13,13 @@
 
 optional arguments:
   -h, --help            show this help message and exit
-  -vd VIDEO_DIRECTORY, --video_directory VIDEO_DIRECTORY
-                        Path to the video directory
   -sd SUBTITLE_DIRECTORY, --subtitle_directory SUBTITLE_DIRECTORY
                         Path to the subtitle directory
+  -vd VIDEO_DIRECTORY, --video_directory VIDEO_DIRECTORY
+                        Path to the video directory
   -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
                         Max global log loss for alignment
-  -so, --stretch_on    Switch on stretch on subtitles
+  -so, --stretch_on     Switch on stretch on subtitles)
   -sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --stretch_in_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
                         Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].
                         NB: This will be ignored if neither -so nor --stretch_on is present
@@ -26,15 +28,23 @@
                         Path to the output directory containing training results
   -od OUTPUT_DIRECTORY, --output_directory OUTPUT_DIRECTORY
                         Path to the output subtitle directory
+  -of {srt,ytt,ttml,txt,smi,xml,ssa,ass,dfxp,sub,scc,tmp,sami,vtt,stl,sbv}, --output_format {srt,ytt,ttml,txt,smi,xml,ssa,ass,dfxp,sub,scc,tmp,sami,vtt,stl,sbv}
+                        File format of the output subtitles
   -t TRANSLATE, --translate TRANSLATE
                         Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
+  -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
+                        Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
+  -mr {whisper}, --transcription_recipe {whisper}
+                        LLM recipe used for transcribing video files
+  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
+                        Flavour variation for a specific LLM recipe supporting transcription
   -lgs, --languages     Print out language codes used for stretch and translation
   -d, --debug           Print out debugging information
   -q, --quiet           Switch off logging information
   -ver, --version       show program's version number and exit
 
 required arguments:
-  -m {single,dual}, --mode {single,dual}
+  -m {single,dual,script,transcribe}, --mode {single,dual,script,transcribe}
                         Alignment mode: either single or dual
 """
 
@@ -43,6 +53,7 @@
 import traceback
 import os
 import pkg_resources
+import tempfile
 
 
 def main():
@@ -65,22 +76,22 @@ def main():
         "--mode",
         type=str,
         default="",
-        choices=["single", "dual"],
+        choices=["single", "dual", "script", "transcribe"],
         help="Alignment mode: either single or dual",
     )
     parser.add_argument(
-        "-vd",
-        "--video_directory",
+        "-sd",
+        "--subtitle_directory",
         type=str,
         default="",
-        help="Path to the video directory",
+        help="Path to the subtitle directory",
     )
     parser.add_argument(
-        "-sd",
-        "--subtitle_directory",
+        "-vd",
+        "--video_directory",
         type=str,
         default="",
-        help="Path to the subtitle directory",
+        help="Path to the video directory",
     )
     parser.add_argument(
         "-l",
@@ -139,6 +150,31 @@ def main():
         type=str,
         help="Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)",
     )
+    parser.add_argument(
+        "-ml",
+        "--main_language",
+        type=str.lower,
+        choices=Utils.get_stretch_language_codes(),
+        help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
+    )
+    from subaligner.llm import TranscriptionRecipe
+    from subaligner.llm import WhisperFlavour
+    parser.add_argument(
+        "-mr",
+        "--transcription_recipe",
+        type=str.lower,
+        default=TranscriptionRecipe.WHISPER.value,
+        choices=[r.value for r in TranscriptionRecipe],
+        help="LLM recipe used for transcribing video files"
+    )
+    parser.add_argument(
+        "-mf",
+        "--transcription_flavour",
+        type=str.lower,
+        default=WhisperFlavour.SMALL.value,
+        choices=[wf.value for wf in WhisperFlavour],
+        help="Flavour variation for a specific LLM recipe supporting transcription"
+    )
     parser.add_argument("-lgs", "--languages", action="store_true",
                         help="Print out language codes used for stretch and translation")
     parser.add_argument("-d", "--debug", action="store_true",
@@ -159,7 +195,7 @@ def main():
         print("ERROR: --video_directory was not passed in")
         parser.print_usage()
         sys.exit(21)
-    if FLAGS.subtitle_directory == "":
+    if FLAGS.mode != "transcribe" and FLAGS.subtitle_directory == "":
         print("ERROR: --subtitle_directory was not passed in")
         parser.print_usage()
         sys.exit(21)
@@ -167,28 +203,40 @@ def main():
         print("ERROR: --output_directory was not passed in")
         parser.print_usage()
         sys.exit(21)
-    if os.path.abspath(FLAGS.subtitle_directory) == os.path.abspath(FLAGS.output_directory):
+    if FLAGS.mode != "transcribe" and os.path.abspath(FLAGS.subtitle_directory) == os.path.abspath(FLAGS.output_directory):
         print("ERROR: The output directory cannot be set to the same as the input subtitle directory")
         parser.print_usage()
         sys.exit(21)
-    if FLAGS.translate is not None:
+    if FLAGS.translate is not None or FLAGS.mode == "transcribe":
         if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
-            print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
+            print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
+            sys.exit(21)
+    if FLAGS.stretch_on or FLAGS.mode == "script":
+        if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
+            print('ERROR: Alignment has been configured to use extra features. Please install "subaligner[stretch]" and run your command again.')
+            sys.exit(21)
+    if FLAGS.mode == "transcribe":
+        if FLAGS.main_language is None:
+            print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
+            parser.print_usage()
             sys.exit(21)
 
     video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
                         os.walk(FLAGS.video_directory) for p in files if not p.startswith(".")]
-    subtitle_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
-                           os.walk(FLAGS.subtitle_directory) for p in files if not p.startswith(".")]
-    if len(video_file_paths) != len(subtitle_file_paths):
-        print("ERROR: The numbers of input videos and subtitles do not match")
-        parser.print_usage()
-        sys.exit(21)
+
+    if FLAGS.mode != "transcribe":
+        subtitle_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
+                               os.walk(FLAGS.subtitle_directory) for p in files if not p.startswith(".")]
+        if len(video_file_paths) != len(subtitle_file_paths):
+            print("ERROR: The numbers of input videos and subtitles do not match")
+            parser.print_usage()
+            sys.exit(21)
 
     output_dir = os.path.abspath(FLAGS.output_directory)
     os.makedirs(output_dir, exist_ok=True)
     video_file_paths = sorted(video_file_paths, key=lambda x: os.path.splitext(os.path.basename(x))[0])
-    subtitle_file_paths = sorted(subtitle_file_paths, key=lambda x: os.path.splitext(os.path.basename(x))[0])
+    if FLAGS.mode != "transcribe":
+        subtitle_file_paths = sorted(subtitle_file_paths, key=lambda x: os.path.splitext(os.path.basename(x))[0])
     exit_segfail = FLAGS.exit_segfail
     stretch = FLAGS.stretch_on
     stretch_in_lang = FLAGS.stretch_in_language
@@ -205,15 +253,16 @@ def main():
     failures = []
     for index in range(len(video_file_paths)):
         local_video_path = video_file_paths[index]
-        local_subtitle_path = subtitle_file_paths[index]
+        local_subtitle_path = subtitle_file_paths[index] if FLAGS.mode != "transcribe" else "{}.srt".format(tempfile.mkstemp()[1])
         try:
+            voice_probabilities = None
             if FLAGS.mode == "single":
                 aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
                     video_file_path=local_video_path,
                     subtitle_file_path=local_subtitle_path,
                     weights_dir=os.path.join(FLAGS.training_output_directory, "models", "training", "weights")
                 )
-            else:
+            elif FLAGS.mode == "dual":
                 aligned_subs, subs, voice_probabilities, frame_rate = predictor.predict_dual_pass(
                     video_file_path=local_video_path,
                     subtitle_file_path=local_subtitle_path,
@@ -222,12 +271,30 @@ def main():
                     stretch_in_lang=stretch_in_lang,
                     exit_segfail=exit_segfail,
                 )
+            elif FLAGS.mode == "script":
+                aligned_subs, _, voice_probabilities, frame_rate = predictor.predict_plain_text(
+                    video_file_path=local_video_path,
+                    subtitle_file_path=local_subtitle_path,
+                    stretch_in_lang=stretch_in_lang,
+                )
+            elif FLAGS.mode == "transcribe":
+                from subaligner.transcriber import Transcriber
+                transcriber = Transcriber(recipe=FLAGS.transcription_recipe, flavour=FLAGS.transcription_flavour)
+                subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
+                aligned_subs = subtitle.subs
 
-            parent_dir = os.path.dirname(local_subtitle_path.replace(os.path.abspath(FLAGS.subtitle_directory), output_dir))
-            os.makedirs(parent_dir, exist_ok=True)
-            file_parts = os.path.basename(local_subtitle_path).rsplit(".", 1)
-            file_parts[1] = FLAGS.output_format if FLAGS.output_format != "" else file_parts[1]
-            aligned_subtitle_path = os.path.abspath(os.path.join(parent_dir, ".".join(file_parts).replace(".stl", ".srt")))
+            if FLAGS.mode == "transcribe":
+                parent_dir = os.path.dirname(video_file_paths[index].replace(os.path.abspath(FLAGS.video_directory), output_dir))
+                os.makedirs(parent_dir, exist_ok=True)
+                file_parts = os.path.basename(video_file_paths[index]).rsplit(".", 1)
+                file_parts[1] = FLAGS.output_format if FLAGS.output_format != "" else "srt"
+                aligned_subtitle_path = os.path.abspath(os.path.join(parent_dir, ".".join(file_parts).replace(".stl", ".srt")))
+            else:
+                parent_dir = os.path.dirname(local_subtitle_path.replace(os.path.abspath(FLAGS.subtitle_directory), output_dir))
+                os.makedirs(parent_dir, exist_ok=True)
+                file_parts = os.path.basename(local_subtitle_path).rsplit(".", 1)
+                file_parts[1] = FLAGS.output_format if FLAGS.output_format != "" else file_parts[1]
+                aligned_subtitle_path = os.path.abspath(os.path.join(parent_dir, ".".join(file_parts).replace(".stl", ".srt")))
 
             if FLAGS.translate is not None:
                 from subaligner.translator import Translator
@@ -235,16 +302,19 @@ def main():
                 translator = Translator(source, target)
                 aligned_subs = translator.translate(aligned_subs)
                 Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path, frame_rate, "utf-8")
-            else:
+            elif FLAGS.mode == "transcribe":
                 Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path, frame_rate, "utf-8")
+            else:
+                Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path, frame_rate)
 
-            log_loss = predictor.get_log_loss(voice_probabilities, aligned_subs)
-            if log_loss is None or log_loss > FLAGS.max_logloss:
-                print(
-                    "ERROR: Alignment failed with a too high loss value: {} for {} and {}".format(log_loss, local_video_path, local_subtitle_path)
-                )
-                failures.append((local_video_path, local_subtitle_path))
-                continue
+            if voice_probabilities is not None:
+                log_loss = predictor.get_log_loss(voice_probabilities, aligned_subs)
+                if log_loss is None or log_loss > FLAGS.max_logloss:
+                    print(
+                        "ERROR: Alignment failed with a too high loss value: {} for {} and {}".format(log_loss, local_video_path, local_subtitle_path)
+                    )
+                    failures.append((local_video_path, local_subtitle_path))
+                    continue
 
             print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
         except UnsupportedFormatException as e:

From 5c71ccf9598a1d860387b586cdac1c60057f7f55 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 9 Jun 2023 09:52:19 +0100
Subject: [PATCH 14/20] reconcile dependencies and update project metadata

---
 Pipfile                             |  2 +-
 requirements-dev.txt                |  4 ++--
 requirements-llm.txt                |  1 -
 requirements.txt                    |  6 +++---
 setup.py                            |  8 ++++----
 subaligner/_version.py              |  2 +-
 subaligner/trainer.py               |  6 +++---
 subaligner/transcriber.py           |  2 +-
 subaligner/translator.py            | 10 +++++-----
 tests/subaligner/test_translator.py | 17 ++++++++++++++++-
 10 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/Pipfile b/Pipfile
index f84d858..5e4dffc 100644
--- a/Pipfile
+++ b/Pipfile
@@ -68,6 +68,7 @@ psutil = "==5.6.7"
 py = "==1.10.0"
 pyasn1 = "==0.4.8"
 pyasn1-modules = "==0.2.7"
+pycountry = "~=20.7.3"
 pydot = "==1.2.4"
 pydot-ng = "==1.0.0"
 pydotplus = "==2.0.2"
@@ -100,7 +101,6 @@ typing-extensions = "~=3.7.0"
 urllib3 = "~=1.26.5"
 Werkzeug = ">=0.15.3"
 zict = "==0.1.3"
-zipp = "==0.6.0"
 aeneas = "==1.7.3.0"
 
 [requires]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d6b5f10..4007188 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -2,13 +2,13 @@ mock==4.0.3
 coverage==5.5
 tox~=3.23.0
 pycodestyle==2.5.0
-twine>=3.1.1
+twine<4.0.0
 snakeviz==2.1.0
 line-profiler==3.1.0
 scikit-build==0.11.1
 radish-bdd~=0.13.3
 pex<=2.1.80
-mypy==0.931
+mypy==1.3.0
 types-requests==2.27.9
 types-setuptools==57.4.9
 typing-extensions<4.0.0
diff --git a/requirements-llm.txt b/requirements-llm.txt
index fbe39c8..ec609ca 100644
--- a/requirements-llm.txt
+++ b/requirements-llm.txt
@@ -1,4 +1,3 @@
-pycountry~=20.7.3
 sentencepiece~=0.1.95
 torch<1.13.0
 transformers<4.27.0
diff --git a/requirements.txt b/requirements.txt
index 1818ca4..489686f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -43,6 +43,7 @@ psutil==5.6.7
 py==1.10.0
 pyasn1==0.4.8
 pyasn1-modules==0.2.7
+pycountry~=20.7.3
 pydot==1.2.4
 pydot-ng==1.0.0
 pydotplus==2.0.2
@@ -52,7 +53,7 @@ pysubs2<=1.4.2
 pystack-debugger==0.8.0
 pytz==2018.4
 PyYAML>=4.2b1
-requests~=2.25.1
+requests<3.0.0
 requests-oauthlib==1.3.0
 rsa==4.7
 scipy<=1.8.1
@@ -60,7 +61,7 @@ scikit-learn<1.2.0
 setuptools>=41.0.0
 six~=1.15.0
 tblib==1.3.2
-tensorflow>=1.15.5,<2.12
+tensorflow>=1.15.5,<2.13
 termcolor==1.1.0
 toml==0.10.0
 toolz==0.9.0
@@ -68,4 +69,3 @@ tornado==5.1.0
 urllib3~=1.26.5
 Werkzeug>=0.15.3
 zict==0.1.3
-zipp==0.6.0
diff --git a/setup.py b/setup.py
index 909ee15..1220641 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
     "dev": dev_requirements + stretch_requirements + llm_requirements + docs_requirements,
     "docs": docs_requirements,
     "stretch": stretch_requirements,
-    "translation": llm_requirements,    # for backward compatibility and will be deprecated with "llm"
+    "translation": llm_requirements,    # for backward compatibility and now deprecated with "llm"
     "llm": llm_requirements,
 }
 
@@ -41,17 +41,17 @@
       author_email="xi.bai.ed@gmail.com",
       classifiers=[
           "License :: OSI Approved :: MIT License",
-          "Programming Language :: Python :: 3.7",
           "Programming Language :: Python :: 3.8",
           "Programming Language :: Python :: 3.9",
+          "Programming Language :: Python :: 3.10",
           "Intended Audience :: Developers",
       ],
       license="MIT",
       url="https://subaligner.readthedocs.io/en/latest/",
-      description="Automatically synchronize and translate subtitles with pretrained deep neural networks, forced alignments and transformers.",
+      description="Automatically synchronize and translate subtitles, or create new ones by transcribing, using pre-trained DNNs, Forced Alignments and Transformers.",
       long_description=readme + "\n\n",
       long_description_content_type='text/markdown',
-      python_requires=">=3.6",
+      python_requires=">=3.8",
       package_dir={"subaligner": "subaligner"},
       packages=[
           "subaligner",
diff --git a/subaligner/_version.py b/subaligner/_version.py
index 7945cf2..eba52d9 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/subaligner/trainer.py b/subaligner/trainer.py
index eb0c25e..815439b 100644
--- a/subaligner/trainer.py
+++ b/subaligner/trainer.py
@@ -8,7 +8,7 @@
 import numpy as np
 import multiprocessing as mp
 
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Union
 from .network import Network
 from .media_helper import MediaHelper
 from .hyperparameters import Hyperparameters
@@ -330,8 +330,8 @@ def __extract_in_multithreads(
         index: int,
         av_file_path: str,
         subtitle_file_path: str,
-        train_data: np.ndarray,
-        labels: np.ndarray,
+        train_data: Union[np.ndarray, List],
+        labels: Union[np.ndarray, List],
         sound_effect_start_marker: Optional[str],
         sound_effect_end_marker: Optional[str]
     ) -> Tuple[str, str]:
diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py
index 0a0481a..3122ff4 100644
--- a/subaligner/transcriber.py
+++ b/subaligner/transcriber.py
@@ -55,7 +55,7 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
             try:
                 audio = whisper.load_audio(audio_file_path)
                 self.__LOGGER.debug("Start transcribing the audio...")
-                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
+                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang], logprob_threshold=-1.2, no_speech_threshold=0.16)
                 self.__LOGGER.info("Finished transcribing the audio")
                 srt_str = ""
                 for i, segment in enumerate(result["segments"], start=1):
diff --git a/subaligner/translator.py b/subaligner/translator.py
index 2cd977d..89462c4 100644
--- a/subaligner/translator.py
+++ b/subaligner/translator.py
@@ -112,11 +112,11 @@ def translate(self,
         elif self.__recipe == TranslationRecipe.WHISPER.value:
             assert video_file_path is not None
             lang = Utils.get_iso_639_alpha_2(self.__tgt_language)
-            if lang not in LANGUAGES:
-                raise TranslationException(f'"{self.__tgt_language}" is not supported by {self.__recipe}')
+            if lang not in LANGUAGES or lang != "en":
+                raise TranslationException(f'"{self.__tgt_language}" is not supported by {self.__recipe} as a translation target by {self.__recipe}')
             audio = whisper.load_audio(video_file_path)
             self.__LOGGER.debug("Start translating the audio...")
-            result = self.__lang_model.transcribe(audio, task="translate", language=LANGUAGES[lang])
+            result = self.__lang_model.transcribe(audio, task="translate")
             self.__LOGGER.info("Finished translating the audio")
             srt_str = ""
             for i, segment in enumerate(result["segments"], start=1):
@@ -131,7 +131,7 @@ def translate(self,
             self.__tokenizer.src_lang = Translator.__MBART_LANGUAGE_CODE_MAPPER.get(src_lang, None)
             lang_code = Translator.__MBART_LANGUAGE_CODE_MAPPER.get(tgt_lang, None)
             if src_lang is None or tgt_lang is None:
-                raise NotImplementedError(f"Language pair of {src_lang} and {src_lang} is not supported")
+                raise NotImplementedError(f"Language pair of {src_lang} and {src_lang} is not supported by {self.__recipe}")
             translated_texts = []
             self.__lang_model.eval()
             new_subs = deepcopy(subs)
@@ -163,7 +163,7 @@ def __initialise_model(self, src_lang: str, tgt_lang: str, recipe: str, flavour:
             elif self.__download_mt_model(src_lang, tgt_lang, HelsinkiNLPFlavour.OPUS_MT_TC_BIG.value):
                 return
             else:
-                message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
+                message = f'Cannot find the {recipe} MT model for source language "{src_lang}" and destination language "{tgt_lang}"'
                 self.__LOGGER.error(message)
                 raise NotImplementedError(message)
         elif recipe == TranslationRecipe.WHISPER.value:
diff --git a/tests/subaligner/test_translator.py b/tests/subaligner/test_translator.py
index 6aa810c..540323d 100644
--- a/tests/subaligner/test_translator.py
+++ b/tests/subaligner/test_translator.py
@@ -4,6 +4,7 @@
 from parameterized import parameterized
 from subaligner.subtitle import Subtitle
 from subaligner.llm import TranslationRecipe, HelsinkiNLPFlavour, WhisperFlavour, FacebookMbartFlavour
+from subaligner.exception import TranslationException
 from subaligner.translator import Translator as Undertest
 
 
@@ -39,7 +40,7 @@ def test_translate_whisper(self, load_model, load_audio):
         load_model.return_value = model
         model.transcribe.return_value = {"segments": [{"start": 0, "end": 1, "text": "translated"}]}
 
-        undertest = Undertest("eng", "zho", recipe=TranslationRecipe.WHISPER.value, flavour=WhisperFlavour.TINY.value)
+        undertest = Undertest("eng", "eng", recipe=TranslationRecipe.WHISPER.value, flavour=WhisperFlavour.TINY.value)
         translated_subs = undertest.translate(subs, "video_path")
 
         self.assertEqual(["translated"], [*map(lambda x: x.text, translated_subs)])
@@ -72,3 +73,17 @@ def test_throw_exception_on_translating_subs(self, mock_tokenizer_from_pretraine
             self.assertTrue(isinstance(e, NotImplementedError))
         else:
             self.fail("Should have thrown exception")
+
+    @patch("whisper.load_model")
+    def test_throw_exception_on_unsupported_whisper_translation_target(self, load_model):
+        subs = Subtitle.load(self.srt_file_path).subs
+        model = Mock()
+        load_model.return_value = model
+        model.transcribe.return_value = {"segments": [{"start": 0, "end": 1, "text": "translated"}]}
+
+        try:
+            Undertest("eng", "unk", recipe=TranslationRecipe.WHISPER.value, flavour=WhisperFlavour.TINY.value).translate(subs, "video_path")
+        except Exception as e:
+            self.assertTrue(isinstance(e, TranslationException))
+        else:
+            self.fail("Should have thrown exception")

From 5e4c3f31c8543f6f0e814e968b4858e7586c6b2a Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 9 Jun 2023 18:50:52 +0100
Subject: [PATCH 15/20] update docker base images

---
 .github/workflows/dockerhub.yml               | 32 +++++++++----------
 Makefile                                      |  9 +++++-
 ...ockerfile-Debian10 => Dockerfile-Debian11} |  2 +-
 ...ockerfile-Fedora31 => Dockerfile-Fedora37} |  4 ++-
 docker/docker-compose.yml                     |  8 ++---
 subaligner/_version.py                        |  2 +-
 subaligner/transcriber.py                     |  2 +-
 7 files changed, 34 insertions(+), 25 deletions(-)
 rename docker/{Dockerfile-Debian10 => Dockerfile-Debian11} (94%)
 rename docker/{Dockerfile-Fedora31 => Dockerfile-Fedora37} (89%)

diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
index c4c6ccd..93792dd 100644
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@@ -60,30 +60,30 @@ jobs:
           tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.u20
           push: true
 
-      - name: Build and push the Fedora 31 image
-        id: docker_build_fed31
-        uses: docker/build-push-action@v2
-        with:
-          context: ./docker
-          file: "./docker/Dockerfile-Fedora31"
-          build-args: |
-            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
-          allow: network.host
-          github-token: ${{ github.token }}
-          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.fed31
-          push: true
+#      - name: Build and push the Fedora 37 image
+#        id: docker_build_fed37
+#        uses: docker/build-push-action@v2
+#        with:
+#          context: ./docker
+#          file: "./docker/Dockerfile-Fedora37"
+#          build-args: |
+#            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
+#          allow: network.host
+#          github-token: ${{ github.token }}
+#          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.fed37
+#          push: true
 
-      - name: Build and push the Debian 10 image
-        id: docker_build_deb10
+      - name: Build and push the Debian 11 image
+        id: docker_build_deb11
         uses: docker/build-push-action@v2
         with:
           context: ./docker
-          file: "./docker/Dockerfile-Debian10"
+          file: "./docker/Dockerfile-Debian11"
           build-args: |
             "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
           allow: network.host
           github-token: ${{ github.token }}
-          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.deb10
+          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.deb11
           push: true
 
       - name: Build and push the ArchLinux image
diff --git a/Makefile b/Makefile
index ff2eedf..1d3e7d1 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ endef
 export BROWSER_PYSCRIPT
 BROWSER := python -c "$$BROWSER_PYSCRIPT"
 
-.PHONY: install uninstall build-gzip build-rpm test test-all pydoc coverage manual dist release clean clean-dist clean-doc clean-manual clean-build clean-pyc clean-test clean-rpm
+.PHONY: install uninstall build-gzip build-rpm test test-all docker-build pydoc coverage manual dist release clean clean-dist clean-doc clean-manual clean-build clean-pyc clean-test clean-rpm
 
 install:
 	if [ ! -e ".$(PYTHON)" ]; then ~/.pyenv/versions/$(PYTHON)/bin/python3 -m venv .$(PYTHON); fi
@@ -180,6 +180,13 @@ app: clean-wheels
 	STRETCH_OFF=True .$(PYTHON)/bin/python setup.py bdist_wheel -d ./wheels; \
 	.$(PYTHON)/bin/pex subaligner==$(SUBALIGNER_VERSION) --repo=./wheels --platform $(PLATFORM) --no-pypi --no-build --python-shebang="/usr/bin/env python3" -e subaligner -o subaligner-$(PLATFORM).app; \
 
+docker-build:
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Ubuntu20 .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-ArchLinux .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-CentOS7 .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Debian11 .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Fedora37 .
+
 docker-images:
 	SUBALIGNER_VERSION=$(SUBALIGNER_VERSION) docker-compose -f ./docker/docker-compose.yml build
 
diff --git a/docker/Dockerfile-Debian10 b/docker/Dockerfile-Debian11
similarity index 94%
rename from docker/Dockerfile-Debian10
rename to docker/Dockerfile-Debian11
index d3fe362..1d91f42 100644
--- a/docker/Dockerfile-Debian10
+++ b/docker/Dockerfile-Debian11
@@ -1,5 +1,5 @@
 # Subaligner Debian docker image
-FROM debian:stable-20211011-slim
+FROM debian:stable-20230202-slim
 
 ARG RELEASE_VERSION
 
diff --git a/docker/Dockerfile-Fedora31 b/docker/Dockerfile-Fedora37
similarity index 89%
rename from docker/Dockerfile-Fedora31
rename to docker/Dockerfile-Fedora37
index 3e2323f..e4fb24e 100644
--- a/docker/Dockerfile-Fedora31
+++ b/docker/Dockerfile-Fedora37
@@ -1,5 +1,5 @@
 # Subaligner Fedora Docker Image
-From fedora:31
+From fedora:37
 
 ARG RELEASE_VERSION
 
@@ -15,6 +15,8 @@ RUN ["/bin/bash", "-c", "dnf install -y dnf-utils &&\
     dnf install -y espeak-ng &&\
     ln -s /usr/lib64/libespeak-ng.so.1 /usr/lib64/libespeak.so &&\
     dnf install -y libsndfile-devel &&\
+    dnf install -y blas lapack blas-devel lapack-devel &&\
+    dnf install -y gcc-c++ &&\
     dnf install -y python3 &&\
     dnf install -y gcc &&\
     dnf install -y python3-wheel &&\
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 424665b..82f0474 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -28,11 +28,11 @@ services:
   subaligner-debian10:
     build:
       context: ./
-      dockerfile: Dockerfile-Debian10
-    image: baxtree/subaligner:${SUBALIGNER_VERSION}.deb10
+      dockerfile: Dockerfile-Debian11
+    image: baxtree/subaligner:${SUBALIGNER_VERSION}.deb11
 
   subaligner-fedora31:
     build:
       context: ./
-      dockerfile: Dockerfile-Fedora31
-    image: baxtree/subaligner:${SUBALIGNER_VERSION}.fed31
+      dockerfile: Dockerfile-Fedora37
+    image: baxtree/subaligner:${SUBALIGNER_VERSION}.fed37
diff --git a/subaligner/_version.py b/subaligner/_version.py
index eba52d9..c4206c9 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.3.2"
+__version__ = "0.3.3"
diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py
index 3122ff4..0a0481a 100644
--- a/subaligner/transcriber.py
+++ b/subaligner/transcriber.py
@@ -55,7 +55,7 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
             try:
                 audio = whisper.load_audio(audio_file_path)
                 self.__LOGGER.debug("Start transcribing the audio...")
-                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang], logprob_threshold=-1.2, no_speech_threshold=0.16)
+                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
                 self.__LOGGER.info("Finished transcribing the audio")
                 srt_str = ""
                 for i, segment in enumerate(result["segments"], start=1):

From 8d941ddadfe97cc71bb22151802d3f7fd2bb93e8 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 12 Jun 2023 09:27:27 +0100
Subject: [PATCH 16/20] add the ubuntu 22 image

---
 .github/workflows/dockerhub.yml               | 37 +++++++++++++------
 Makefile                                      |  3 +-
 ...ockerfile-Fedora37 => Dockerfile-Fedora34} |  2 +-
 docker/Dockerfile-Ubuntu22                    | 19 ++++++++++
 docker/docker-compose.yml                     | 14 +++++--
 5 files changed, 57 insertions(+), 18 deletions(-)
 rename docker/{Dockerfile-Fedora37 => Dockerfile-Fedora34} (98%)
 create mode 100644 docker/Dockerfile-Ubuntu22

diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
index 93792dd..6283144 100644
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@@ -60,18 +60,31 @@ jobs:
           tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.u20
           push: true
 
-#      - name: Build and push the Fedora 37 image
-#        id: docker_build_fed37
-#        uses: docker/build-push-action@v2
-#        with:
-#          context: ./docker
-#          file: "./docker/Dockerfile-Fedora37"
-#          build-args: |
-#            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
-#          allow: network.host
-#          github-token: ${{ github.token }}
-#          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.fed37
-#          push: true
+      - name: Build and push the Ubuntu 22 image
+        id: docker_build_u22
+        uses: docker/build-push-action@v2
+        with:
+          context: ./docker
+          file: "./docker/Dockerfile-Ubuntu22"
+          build-args: |
+            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
+          allow: network.host
+          github-token: ${{ github.token }}
+          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.u22
+          push: true
+
+      - name: Build and push the Fedora 34 image
+        id: docker_build_fed34
+        uses: docker/build-push-action@v2
+        with:
+          context: ./docker
+          file: "./docker/Dockerfile-Fedora34"
+          build-args: |
+            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
+          allow: network.host
+          github-token: ${{ github.token }}
+          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.fed34
+          push: true
 
       - name: Build and push the Debian 11 image
         id: docker_build_deb11
diff --git a/Makefile b/Makefile
index 1d3e7d1..37329ff 100644
--- a/Makefile
+++ b/Makefile
@@ -182,10 +182,11 @@ app: clean-wheels
 
 docker-build:
 	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Ubuntu20 .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Ubuntu22 .
 	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-ArchLinux .
 	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-CentOS7 .
 	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Debian11 .
-	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Fedora37 .
+	docker build --build-arg RELEASE_VERSION=$(SUBALIGNER_VERSION) -f docker/Dockerfile-Fedora34 .
 
 docker-images:
 	SUBALIGNER_VERSION=$(SUBALIGNER_VERSION) docker-compose -f ./docker/docker-compose.yml build
diff --git a/docker/Dockerfile-Fedora37 b/docker/Dockerfile-Fedora34
similarity index 98%
rename from docker/Dockerfile-Fedora37
rename to docker/Dockerfile-Fedora34
index e4fb24e..952058a 100644
--- a/docker/Dockerfile-Fedora37
+++ b/docker/Dockerfile-Fedora34
@@ -1,5 +1,5 @@
 # Subaligner Fedora Docker Image
-From fedora:37
+From fedora:34
 
 ARG RELEASE_VERSION
 
diff --git a/docker/Dockerfile-Ubuntu22 b/docker/Dockerfile-Ubuntu22
new file mode 100644
index 0000000..23974e5
--- /dev/null
+++ b/docker/Dockerfile-Ubuntu22
@@ -0,0 +1,19 @@
+# Subaligner Ubuntu 22 Docker Image
+FROM ubuntu:22.04
+
+ARG RELEASE_VERSION
+
+ENV RELEASE_VERSION=${RELEASE_VERSION}
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/London
+
+RUN ["/bin/bash", "-c", "apt-get -y update &&\
+    apt-get -y install ffmpeg &&\
+    apt-get -y install espeak libespeak1 libespeak-dev espeak-data &&\
+    apt-get -y install libsndfile-dev &&\
+    apt-get -y install python3-dev &&\
+    apt-get -y install python3-tk &&\
+    apt-get -y install python3-pip &&\
+    python3 -m pip install --upgrade pip &&\
+    python3 -m pip install \"subaligner==${RELEASE_VERSION}\" &&\
+    python3 -m pip install \"subaligner[harmony]==${RELEASE_VERSION}\""]
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 82f0474..c68b25b 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -19,20 +19,26 @@ services:
       dockerfile: Dockerfile-Ubuntu20
     image: baxtree/subaligner:${SUBALIGNER_VERSION}.u20
 
+  subaligner-ubuntu22:
+    build:
+      context: ./
+      dockerfile: Dockerfile-Ubuntu22
+    image: baxtree/subaligner:${SUBALIGNER_VERSION}.u22
+
   subaligner-archlinux:
     build:
       context: ./
       dockerfile: Dockerfile-ArchLinux
     image: baxtree/subaligner:${SUBALIGNER_VERSION}.arch
 
-  subaligner-debian10:
+  subaligner-debian11:
     build:
       context: ./
       dockerfile: Dockerfile-Debian11
     image: baxtree/subaligner:${SUBALIGNER_VERSION}.deb11
 
-  subaligner-fedora31:
+  subaligner-fedora34:
     build:
       context: ./
-      dockerfile: Dockerfile-Fedora37
-    image: baxtree/subaligner:${SUBALIGNER_VERSION}.fed37
+      dockerfile: Dockerfile-Fedora34
+    image: baxtree/subaligner:${SUBALIGNER_VERSION}.fed34

From 5194dab978d327377a16dfeae18310c4f83ac8c0 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 19 Jun 2023 09:29:52 +0100
Subject: [PATCH 17/20] enable the progress bar during transcription

---
 subaligner/transcriber.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py
index 0a0481a..658e823 100644
--- a/subaligner/transcriber.py
+++ b/subaligner/transcriber.py
@@ -54,8 +54,9 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle
             audio_file_path = self.__media_helper.extract_audio(video_file_path, True, 16000)
             try:
                 audio = whisper.load_audio(audio_file_path)
-                self.__LOGGER.debug("Start transcribing the audio...")
-                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
+                self.__LOGGER.info("Start transcribing the audio...")
+                verbose = False if Logger.VERBOSE and not Logger.QUIET else None
+                result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang], verbose=verbose)
                 self.__LOGGER.info("Finished transcribing the audio")
                 srt_str = ""
                 for i, segment in enumerate(result["segments"], start=1):

From c0ad422edc21932abe885c84e7e82d0c238f5210 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Fri, 7 Jul 2023 18:32:30 +0100
Subject: [PATCH 18/20] upgrade whisper and add aarch64 requirements

---
 Makefile                 | 10 ++++--
 Pipfile                  |  6 +---
 requirements-aarch64.txt | 71 ++++++++++++++++++++++++++++++++++++++++
 requirements-llm.txt     |  2 +-
 requirements.txt         |  3 --
 setup.py                 | 12 ++++++-
 subaligner/_version.py   |  2 +-
 7 files changed, 93 insertions(+), 13 deletions(-)
 create mode 100644 requirements-aarch64.txt

diff --git a/Makefile b/Makefile
index 37329ff..8c6a7a6 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,13 @@ endif
 ifdef PLATFORM
 PLATFORM := $(PLATFORM)
 else
-PLATFORM := linux-x86_64-cp-37-cp37
+PLATFORM := linux-x86_64-cp-38-cp38
+endif
+
+ifdef PYTHON_TAG
+PYTHON_TAG := $(PYTHON_TAG)
+else
+PYTHON_TAG := py38
 endif
 
 SUBALIGNER_VERSION := $(SUBALIGNER_VERSION)
@@ -153,7 +159,7 @@ test-dist:
 
 dist: clean-dist test-dist
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	.$(PYTHON)/bin/python setup.py sdist bdist_wheel bdist_egg
+	.$(PYTHON)/bin/python setup.py sdist bdist_wheel --python-tag=$(PYTHON_TAG)
 
 release:
 	.$(PYTHON)/bin/twine upload dist/*
diff --git a/Pipfile b/Pipfile
index 5e4dffc..1c37619 100644
--- a/Pipfile
+++ b/Pipfile
@@ -38,8 +38,6 @@ dask = ">=2021.10.0,<2022.1.0"
 decorator = "==4.3.0"
 distributed = "==1.13.0"
 filelock = "<4.0.0"
-google-auth = "==1.27.0"
-google-auth-oauthlib = "==0.4.2"
 google-pasta = "~=0.2"
 graphviz = "==0.8.3"
 HeapDict = "==1.0.0"
@@ -61,7 +59,7 @@ mccabe = "==0.6.1"
 numba = ">=0.50.0"
 numpy = "<1.24.0"
 oauthlib = "==3.1.0"
-openai-whisper = "==20230124"
+openai-whisper = "==20230314"
 pbr = "==4.0.2"
 pluggy = "==0.13.1"
 psutil = "==5.6.7"
@@ -81,8 +79,6 @@ pystack-debugger = "==0.8.0"
 python-dateutil = "==2.7.2"
 pytz = "==2018.4"
 PyYAML = ">=4.2b1"
-requests = "~=2.25.1"
-requests-oauthlib = "==1.3.0"
 rsa = "==4.7"
 scipy = "<=1.8.1"
 scikit-learn = ">=0.19.1"
diff --git a/requirements-aarch64.txt b/requirements-aarch64.txt
new file mode 100644
index 0000000..71e5d49
--- /dev/null
+++ b/requirements-aarch64.txt
@@ -0,0 +1,71 @@
+astor==0.7.1
+beautifulsoup4<4.9.0
+bleach==3.3.0
+cachetools==3.1.1
+captionstransformer~=1.2.1
+cchardet==2.1.7
+certifi==2019.11.28
+chardet==3.0.4
+click==5.1
+cloudpickle~=1.6.0
+cycler==0.10.0
+Cython~=0.29.22
+dask>=2021.10.0,<2022.1.0
+decorator==4.3.0
+distributed==1.13.0
+filelock<4.0.0
+google-auth-oauthlib==0.4.2
+google-pasta~=0.2
+graphviz==0.8.3
+HeapDict==1.0.0
+h5py<=4.0.0
+html5lib==1.0b9
+hyperopt==0.2.4
+idna==2.8
+isort==4.3.4
+joblib>=1.2.0
+keras~=2.12.0
+kiwisolver==1.0.1
+lazy-object-proxy==1.4.3
+le-pycaption==2.2.0a1
+librosa<0.10.0
+locket==0.2.0
+Markdown==2.6.11
+mccabe==0.6.1
+networkx>=2.5.1
+numba>=0.50.0
+numpy<1.24.0
+oauthlib==3.1.0
+pbr==4.0.2
+pluggy==0.13.1
+protobuf<4.0
+psutil==5.6.7
+py==1.10.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.7
+pycountry~=20.7.3
+pydot==1.2.4
+pydot-ng==1.0.0
+pydotplus==2.0.2
+pyprof2calltree==1.4.3
+pysrt==1.1.1
+pysubs2<=1.4.2
+pystack-debugger==0.8.0
+pytz==2018.4
+PyYAML>=4.2b1
+rsa==4.7
+scipy<=1.8.1
+scikit-learn<1.2.0
+setuptools>=41.0.0
+six~=1.15.0
+tblib==1.3.2
+tensorflow-macos~=2.12.0
+tensorflow-metal~=0.8.0
+termcolor==1.1.0
+toml==0.10.0
+toolz==0.9.0
+tornado==5.1.0
+urllib3~=1.26.5
+Werkzeug>=0.15.3
+zict==0.1.3
+zipp==0.6.0
\ No newline at end of file
diff --git a/requirements-llm.txt b/requirements-llm.txt
index ec609ca..0e0de7e 100644
--- a/requirements-llm.txt
+++ b/requirements-llm.txt
@@ -1,4 +1,4 @@
 sentencepiece~=0.1.95
 torch<1.13.0
 transformers<4.27.0
-openai-whisper==20230124
\ No newline at end of file
+openai-whisper==20230314
diff --git a/requirements.txt b/requirements.txt
index 489686f..01611cd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,7 +14,6 @@ dask>=2021.10.0,<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
 filelock<4.0.0
-google-auth-oauthlib==0.4.2
 google-pasta~=0.2
 graphviz==0.8.3
 HeapDict==1.0.0
@@ -53,8 +52,6 @@ pysubs2<=1.4.2
 pystack-debugger==0.8.0
 pytz==2018.4
 PyYAML>=4.2b1
-requests<3.0.0
-requests-oauthlib==1.3.0
 rsa==4.7
 scipy<=1.8.1
 scikit-learn<1.2.0
diff --git a/setup.py b/setup.py
index 1220641..3b6545d 100644
--- a/setup.py
+++ b/setup.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import platform
 
 from setuptools import setup
 
@@ -11,6 +12,13 @@
 with open("README.md") as readme_file:
     readme = readme_file.read()
 
+if platform.machine() == "arm64":
+    with open("requirements-aarch64.txt") as requirements_file:
+        requirements = requirements_file.read().splitlines()[::-1]
+else:
+    with open("requirements.txt") as requirements_file:
+        requirements = requirements_file.read().splitlines()[::-1]
+
 with open("requirements.txt") as requirements_file:
     requirements = requirements_file.read().splitlines()[::-1]
 
@@ -52,6 +60,7 @@
       long_description=readme + "\n\n",
       long_description_content_type='text/markdown',
       python_requires=">=3.8",
+      wheel=True,
       package_dir={"subaligner": "subaligner"},
       packages=[
           "subaligner",
@@ -92,4 +101,5 @@
               "subaligner_convert=subaligner.subaligner_convert.__main__:main",
               "subaligner_train=subaligner.subaligner_train.__main__:main",
               "subaligner_tune=subaligner.subaligner_tune.__main__:main",
-          ]})
+          ]
+      })
diff --git a/subaligner/_version.py b/subaligner/_version.py
index c4206c9..4103c8a 100644
--- a/subaligner/_version.py
+++ b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.3.3"
+__version__ = "0.3.4"

From c400a14cfee9d1b6ff6a680cd7c3cce6c2d10bf6 Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Mon, 10 Jul 2023 18:36:06 +0100
Subject: [PATCH 19/20] improve setup and deprecate dependiencies

---
 .github/workflows/ci-pipeline.yml             |  3 +-
 Makefile                                      |  8 +----
 Pipfile                                       |  8 ++---
 README.md                                     |  2 +-
 docker/Dockerfile-Debian11                    |  4 +++
 pyproject.toml                                |  2 ++
 ...ents-aarch64.txt => requirements-arm64.txt |  4 ---
 requirements.txt                              |  9 ++---
 setup.py                                      | 35 ++++++++++++++-----
 subaligner/__init__.py                        |  5 +++
 subaligner/utils.py                           |  6 ++--
 tests/subaligner/test_utils.py                |  4 +--
 12 files changed, 50 insertions(+), 40 deletions(-)
 create mode 100644 pyproject.toml
 rename requirements-aarch64.txt => requirements-arm64.txt (93%)

diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
index 65bfb67..147e736 100644
--- a/.github/workflows/ci-pipeline.yml
+++ b/.github/workflows/ci-pipeline.yml
@@ -1,4 +1,4 @@
-name: ci pipeline
+name: CI Pipeline
 
 on:
   push:
@@ -28,6 +28,7 @@ jobs:
           sudo apt-get -y install espeak libespeak1 libespeak-dev espeak-data
           sudo apt-get -y install libsndfile-dev
           python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools wheel
           cat requirements.txt | xargs -L 1 pip install
           cat requirements-stretch.txt | xargs -L 1 pip install
           cat requirements-llm.txt | xargs -L 1 pip install
diff --git a/Makefile b/Makefile
index 8c6a7a6..2463e21 100644
--- a/Makefile
+++ b/Makefile
@@ -10,12 +10,6 @@ else
 PLATFORM := linux-x86_64-cp-38-cp38
 endif
 
-ifdef PYTHON_TAG
-PYTHON_TAG := $(PYTHON_TAG)
-else
-PYTHON_TAG := py38
-endif
-
 SUBALIGNER_VERSION := $(SUBALIGNER_VERSION)
 TRIGGER_URL := ${TRIGGER_URL}
 
@@ -159,7 +153,7 @@ test-dist:
 
 dist: clean-dist test-dist
 	cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \
-	.$(PYTHON)/bin/python setup.py sdist bdist_wheel --python-tag=$(PYTHON_TAG)
+	.$(PYTHON)/bin/python setup.py sdist bdist_wheel
 
 release:
 	.$(PYTHON)/bin/twine upload dist/*
diff --git a/Pipfile b/Pipfile
index 1c37619..1a4f1a7 100644
--- a/Pipfile
+++ b/Pipfile
@@ -29,11 +29,10 @@ bleach = "==3.3.0"
 cachetools = "==3.1.1"
 captionstransformer = "~=1.2.1"
 certifi = "==2019.11.28"
-chardet = "==3.0.4"
+chardet = "~=3.0.4"
 click = "==5.1"
 cloudpickle = "==0.5.3"
 cycler = "==0.10.0"
-Cython = "~=0.29.22"
 dask = ">=2021.10.0,<2022.1.0"
 decorator = "==4.3.0"
 distributed = "==1.13.0"
@@ -49,8 +48,6 @@ isort = "==4.3.4"
 joblib = ">=1.2.0"
 Keras-Applications = ">=1.0.8"
 Keras-Preprocessing = ">=1.0.9"
-kiwisolver = "==1.0.1"
-lazy-object-proxy = "==1.4.3"
 le-pycaption = "==2.2.0a1"
 librosa = "<0.10.0"
 locket = "==0.2.0"
@@ -61,8 +58,8 @@ numpy = "<1.24.0"
 oauthlib = "==3.1.0"
 openai-whisper = "==20230314"
 pbr = "==4.0.2"
+pkgconfig = "~=1.5.5"
 pluggy = "==0.13.1"
-psutil = "==5.6.7"
 py = "==1.10.0"
 pyasn1 = "==0.4.8"
 pyasn1-modules = "==0.2.7"
@@ -83,7 +80,6 @@ rsa = "==4.7"
 scipy = "<=1.8.1"
 scikit-learn = ">=0.19.1"
 sentencepiece = "~=0.1.95"
-setuptools = ">=41.0.0"
 six = "~=1.15.0"
 tblib = "==1.3.2"
 tensorflow = ">=1.15.5,<2.12"
diff --git a/README.md b/README.md
index 71d9207..c34e715 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ $ brew install ffmpeg
 
 ## Basic Installation
 ```
-$ pip install -U pip && pip install -U setuptools
+$ pip install -U pip && pip install -U setuptools wheel
 $ pip install subaligner
 ```
 or install from source:
diff --git a/docker/Dockerfile-Debian11 b/docker/Dockerfile-Debian11
index 1d91f42..475ff82 100644
--- a/docker/Dockerfile-Debian11
+++ b/docker/Dockerfile-Debian11
@@ -11,9 +11,13 @@ RUN ["/bin/bash", "-c", "apt -y update &&\
     apt -y install ffmpeg &&\
     apt -y install espeak libespeak1 libespeak-dev espeak-data &&\
     apt -y install libsndfile-dev &&\
+    apt -y install libblas-dev liblapack-dev &&\
     apt -y install python3-dev &&\
     apt -y install python3-tk &&\
     apt -y install python3-pip &&\
+    apt -y install python3-venv &&\
+    python3 -m venv .venv &&\
+    source .venv/bin/activate &&\
     python3 -m pip install --upgrade pip &&\
     python3 -m pip install \"subaligner==${RELEASE_VERSION}\" &&\
     python3 -m pip install \"subaligner[harmony]==${RELEASE_VERSION}\""]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..eb56957
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[build-system]
+requires = ["setuptools", "wheel", "Cython"]
\ No newline at end of file
diff --git a/requirements-aarch64.txt b/requirements-arm64.txt
similarity index 93%
rename from requirements-aarch64.txt
rename to requirements-arm64.txt
index 71e5d49..92ff604 100644
--- a/requirements-aarch64.txt
+++ b/requirements-arm64.txt
@@ -3,13 +3,11 @@ beautifulsoup4<4.9.0
 bleach==3.3.0
 cachetools==3.1.1
 captionstransformer~=1.2.1
-cchardet==2.1.7
 certifi==2019.11.28
 chardet==3.0.4
 click==5.1
 cloudpickle~=1.6.0
 cycler==0.10.0
-Cython~=0.29.22
 dask>=2021.10.0,<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
@@ -25,8 +23,6 @@ idna==2.8
 isort==4.3.4
 joblib>=1.2.0
 keras~=2.12.0
-kiwisolver==1.0.1
-lazy-object-proxy==1.4.3
 le-pycaption==2.2.0a1
 librosa<0.10.0
 locket==0.2.0
diff --git a/requirements.txt b/requirements.txt
index 01611cd..167d439 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,13 +3,11 @@ beautifulsoup4<4.9.0
 bleach==3.3.0
 cachetools==3.1.1
 captionstransformer~=1.2.1
-cchardet==2.1.7
 certifi==2019.11.28
-chardet==3.0.4
+chardet~=3.0.4
 click==5.1
 cloudpickle~=1.6.0
 cycler==0.10.0
-Cython~=0.29.22
 dask>=2021.10.0,<2022.1.0
 decorator==4.3.0
 distributed==1.13.0
@@ -25,8 +23,6 @@ isort==4.3.4
 joblib>=1.2.0
 Keras-Applications>=1.0.8
 Keras-Preprocessing>=1.0.9
-kiwisolver==1.0.1
-lazy-object-proxy==1.4.3
 le-pycaption==2.2.0a1
 librosa<0.10.0
 locket==0.2.0
@@ -37,8 +33,8 @@ numba>=0.50.0
 numpy<1.24.0
 oauthlib==3.1.0
 pbr==4.0.2
+pkgconfig~=1.5.5
 pluggy==0.13.1
-psutil==5.6.7
 py==1.10.0
 pyasn1==0.4.8
 pyasn1-modules==0.2.7
@@ -55,7 +51,6 @@ PyYAML>=4.2b1
 rsa==4.7
 scipy<=1.8.1
 scikit-learn<1.2.0
-setuptools>=41.0.0
 six~=1.15.0
 tblib==1.3.2
 tensorflow>=1.15.5,<2.13
diff --git a/setup.py b/setup.py
index 3b6545d..b84e1a4 100644
--- a/setup.py
+++ b/setup.py
@@ -2,9 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import os
-import platform
-
+import sys
+from platform import architecture, machine
 from setuptools import setup
+from wheel.bdist_wheel import bdist_wheel
 
 with open(os.path.join(os.getcwd(), "subaligner", "_version.py")) as f:
     exec(f.read())
@@ -12,16 +13,13 @@
 with open("README.md") as readme_file:
     readme = readme_file.read()
 
-if platform.machine() == "arm64":
-    with open("requirements-aarch64.txt") as requirements_file:
+if machine() == "arm64":
+    with open("requirements-arm64.txt") as requirements_file:
         requirements = requirements_file.read().splitlines()[::-1]
 else:
     with open("requirements.txt") as requirements_file:
         requirements = requirements_file.read().splitlines()[::-1]
 
-with open("requirements.txt") as requirements_file:
-    requirements = requirements_file.read().splitlines()[::-1]
-
 with open("requirements-stretch.txt") as stretch_requirements_file:
     stretch_requirements = stretch_requirements_file.read().splitlines()[::-1]
 
@@ -43,6 +41,24 @@
     "llm": llm_requirements,
 }
 
+architecture = architecture()[0] if sys.platform == "win32" else machine()
+
+
+class bdist_wheel_local(bdist_wheel):
+
+    def get_tag(self):
+        python = f"py{sys.version_info.major}{sys.version_info.minor}"
+        if sys.platform == "darwin" and architecture == "arm64":
+            os_arch = "macosx_11_0_arm64"
+        elif sys.platform == "win32":
+            os_arch = "win32" if architecture == "32bit" else "win_amd64"
+        # elif sys.platform == "linux":
+        #     os_arch = f"manylinux_2_17_{architecture}"
+        else:
+            os_arch = "any"
+        return python, "none", os_arch
+
+
 setup(name="subaligner",
       version=__version__,
       author="Xi Bai",
@@ -58,7 +74,7 @@
       url="https://subaligner.readthedocs.io/en/latest/",
       description="Automatically synchronize and translate subtitles, or create new ones by transcribing, using pre-trained DNNs, Forced Alignments and Transformers.",
       long_description=readme + "\n\n",
-      long_description_content_type='text/markdown',
+      long_description_content_type="text/markdown",
       python_requires=">=3.8",
       wheel=True,
       package_dir={"subaligner": "subaligner"},
@@ -102,4 +118,5 @@
               "subaligner_train=subaligner.subaligner_train.__main__:main",
               "subaligner_tune=subaligner.subaligner_tune.__main__:main",
           ]
-      })
+      },
+      cmdclass={"bdist_wheel": bdist_wheel_local})
diff --git a/subaligner/__init__.py b/subaligner/__init__.py
index ae79fcb..70d02fe 100644
--- a/subaligner/__init__.py
+++ b/subaligner/__init__.py
@@ -1,5 +1,6 @@
 import os
 import warnings
+import logging
 import multiprocessing as mp
 from ._version import __version__
 
@@ -10,3 +11,7 @@
 
 mp.set_start_method("spawn", force=True)
 os.environ["KMP_WARNINGS"] = "0"
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+os.environ["TF_CPP_MIN_VLOG_LEVEL"] = "0"
+logging.getLogger("tensorflow").disabled = True
diff --git a/subaligner/utils.py b/subaligner/utils.py
index fa50d0f..1a771dd 100644
--- a/subaligner/utils.py
+++ b/subaligner/utils.py
@@ -3,7 +3,7 @@
 import pysubs2
 import requests
 import shutil
-import cchardet
+import chardet
 import shlex
 import pycountry
 
@@ -595,9 +595,9 @@ def detect_encoding(subtitle_file_path: str) -> str:
             # and hence this less memory-efficient solution:
             raw = b"".join(file.readlines())
 
-        detected = cchardet.detect(raw)
+        detected = chardet.detect(raw)
         detected = detected or {}
-        return detected["encoding"] if "encoding" in detected else None
+        return detected["encoding"] if "encoding" in detected and detected["encoding"] is not None else "utf-8"
 
     @staticmethod
     def get_file_root_and_extension(file_path: str) -> Tuple[str, str]:
diff --git a/tests/subaligner/test_utils.py b/tests/subaligner/test_utils.py
index da6689c..b4f9cbd 100644
--- a/tests/subaligner/test_utils.py
+++ b/tests/subaligner/test_utils.py
@@ -281,8 +281,8 @@ def test_contains_embedded_subtitle(self):
         self.assertFalse(Undertest.contains_embedded_subtitles(self.mp4_file_path))
 
     def test_detect_encoding(self):
-        self.assertEqual("ASCII", Undertest.detect_encoding(self.real_srt_path))
-        self.assertEqual("UTF-8", Undertest.detect_encoding(self.mkv_file_path))
+        self.assertEqual("ascii", Undertest.detect_encoding(self.real_srt_path))
+        self.assertEqual("utf-8", Undertest.detect_encoding(self.mkv_file_path))
 
     def test_get_file_root_and_extension(self):
         root, extension = Undertest.get_file_root_and_extension("/path/to/root.ext1.ext2")

From cc5cd13c1c11ff12103a1d9e8aeeb10065c80d6a Mon Sep 17 00:00:00 2001
From: baxtree <xi.bai.ed@gmail.com>
Date: Thu, 13 Jul 2023 09:45:15 +0100
Subject: [PATCH 20/20] retire the Debian image

---
 .github/workflows/dockerhub.yml | 13 -------------
 docker/Dockerfile-Debian11      |  1 +
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
index 6283144..4a02ec9 100644
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@@ -86,19 +86,6 @@ jobs:
           tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.fed34
           push: true
 
-      - name: Build and push the Debian 11 image
-        id: docker_build_deb11
-        uses: docker/build-push-action@v2
-        with:
-          context: ./docker
-          file: "./docker/Dockerfile-Debian11"
-          build-args: |
-            "RELEASE_VERSION=${{ steps.tag.outputs.TAG }}"
-          allow: network.host
-          github-token: ${{ github.token }}
-          tags: baxtree/subaligner:${{ steps.tag.outputs.TAG }}.deb11
-          push: true
-
       - name: Build and push the ArchLinux image
         id: docker_build_arch
         uses: docker/build-push-action@v2
diff --git a/docker/Dockerfile-Debian11 b/docker/Dockerfile-Debian11
index 475ff82..a1fdf84 100644
--- a/docker/Dockerfile-Debian11
+++ b/docker/Dockerfile-Debian11
@@ -8,6 +8,7 @@ ENV RELEASE_VERSION=${RELEASE_VERSION}
 ENV TZ=Europe/London
 
 RUN ["/bin/bash", "-c", "apt -y update &&\
+    apt -y install build-essential &&\
     apt -y install ffmpeg &&\
     apt -y install espeak libespeak1 libespeak-dev espeak-data &&\
     apt -y install libsndfile-dev &&\