Skip to content

Commit

Permalink
Adiciona opção batch_size
Browse files Browse the repository at this point in the history
  • Loading branch information
matheusbach committed Sep 23, 2023
1 parent 3143c88 commit ad6594f
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
2 changes: 2 additions & 0 deletions legen.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@
"transcription_device = 'auto' #@param [\"auto\", \"cpu\", \"cuda\"]\n",
"transcription_model = 'large' #@param [\"tiny\", \"small\", \"medium\", \"large\"]\n",
"compute_type = 'default' # @param [\"int8\", \"int16\", \"float16\", \"float32\"]\n",
"batch_size = 12 # @param {type: \"number\"}\n",
"transcription_input_lang = 'auto detect' #@param [\"auto detect\", \"af\", \"am\", \"ar\", \"as\", \"az\", \"ba\", \"be\", \"bg\", \"bn\", \"bo\", \"br\", \"bs\", \"ca\", \"cs\", \"cy\", \"da\", \"de\", \"el\", \"en\", \"es\", \"et\", \"eu\", \"fa\", \"fi\", \"fo\", \"fr\", \"gl\", \"gu\", \"ha\", \"haw\", \"he\", \"hi\", \"hr\", \"ht\", \"hu\", \"hy\", \"id\", \"is\", \"it\", \"ja\", \"jw\", \"ka\", \"kk\", \"km\", \"kn\", \"ko\", \"la\", \"lb\", \"ln\", \"lo\", \"lt\", \"lv\", \"mg\", \"mi\", \"mk\", \"ml\", \"mn\", \"mr\", \"ms\", \"mt\", \"my\", \"ne\", \"nl\", \"nn\", \"no\", \"oc\", \"pa\", \"pl\", \"ps\", \"pt\", \"ro\", \"ru\", \"sa\", \"sd\", \"si\", \"sk\", \"sl\", \"sn\", \"so\", \"sq\", \"sr\", \"su\", \"sv\", \"sw\", \"ta\", \"te\", \"tg\", \"th\", \"tk\", \"tl\", \"tr\", \"tt\", \"uk\", \"ur\", \"uz\", \"vi\", \"yi\", \"yo\", \"zh\"]\n",
"\n",
"#@markdown ---\n",
Expand Down Expand Up @@ -193,6 +194,7 @@
"query += f\" --dev {transcription_device}\"\n",
"query += f\" --model {transcription_model}\"\n",
"query += f\" --compute_type {compute_type}\"\n",
"query += f\" --batch_size {batch_size}\"\n",
"query += f\" --input_lang {transcription_input_lang}\" if transcription_input_lang != \"auto detect\" else \"\"\n",
"query += f\" --lang {target_language_code}\"\n",
"query += f\" -c:v {video_codec}\" + (\"\" if video_hardware_api == \"none\" else f\"_{video_hardware_api}\" if video_hardware_api != \"auto\" else \"_nvenc\" if torch.cuda.is_available() else \"\")\n",
Expand Down
6 changes: 4 additions & 2 deletions legen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import file_utils
import translate_utils

version = "v0.14"
version = "v0.14.2"

# Terminal colors
default = "\033[1;0m"
Expand Down Expand Up @@ -50,6 +50,8 @@
help="Dispositivo para rodar a transcrição pelo Whisper. [cpu, cuda, auto]. (default: auto)")
parser.add_argument("--compute_type", type=str, default="default",
help="Quantization for the neural network. Ex: float32, float16, int8, ...")
parser.add_argument("--batch_size", type=int, default="4",
help="The higher the value, the faster the processing will be. If you have low RAM or have buggy subtitles, reduce this value. Works only using whisperX. (default: 4)")
parser.add_argument("--lang", type=str, default="pt",
help="Idioma para o qual as legendas devem ser traduzidas. Language equals to source video skip translation (default: pt)")
parser.add_argument("--input_lang", type=str, default="auto",
Expand Down Expand Up @@ -200,7 +202,7 @@
print(
f"{wblue}Transcribing{default} with {gray}WhisperX{default}")
whisperx_utils.transcribe_audio(
whisper_model, audio_extracted.getpath(), transcribed_srt_temp.getpath(), audio_language, device=torch_device)
whisper_model, audio_extracted.getpath(), transcribed_srt_temp.getpath(), audio_language, device=torch_device, batch_size=args.batch_size)
else:
print(
f"{wblue}Transcribing{default} with {gray}Whisper{default}")
Expand Down
5 changes: 1 addition & 4 deletions whisperx_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
#import faster_whisper
#import numpy as np

batch_size = 4 # reduce if low on GPU mem


def transcribe_audio(model: whisperx.asr.WhisperModel, audio_path: Path, srt_path: Path, lang: str = None, disable_fp16: bool = False, device: str = "cpu"):
def transcribe_audio(model: whisperx.asr.WhisperModel, audio_path: Path, srt_path: Path, lang: str = None, device: str = "cpu", batch_size: int = 4):
audio = whisperx.load_audio(file=audio_path.as_posix())

# Transcribe
Expand Down

0 comments on commit ad6594f

Please sign in to comment.