Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows compatibility #136

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions bins/calc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,16 @@ def calc_metric(
continue

audios_ref = []
import os

audios_deg = []

files = glob(ref_dir + "/*.wav")
files = glob(os.path.join(ref_dir, "*.wav"))

for file in files:
audios_ref.append(file)
uid = file.split("/")[-1].split(".wav")[0]
file_gt = deg_dir + "/{}.wav".format(uid)
uid = os.path.splitext(os.path.basename(file))[0]
file_gt = os.path.join(deg_dir, f"{uid}.wav")
audios_deg.append(file_gt)

if metric in ["v_uv_f1"]:
Expand Down
4 changes: 2 additions & 2 deletions bins/svc/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,14 @@ def main():
audio_list = []
for suffix in ["wav", "flac", "mp3"]:
audio_list += glob.glob(
os.path.join(source_audio_dir, "**/*.{}".format(suffix)), recursive=True
os.path.join(source_audio_dir, "**", "*.{}".format(suffix)), recursive=True
)
print("There are {} source audios: ".format(len(audio_list)))

# Infer for every file as dataset
output_root_path = args.output_dir
for audio_path in tqdm(audio_list):
audio_name = audio_path.split("/")[-1].split(".")[0]
audio_name = os.path.splitext(os.path.basename(audio_path))[0]
args.output_dir = os.path.join(output_root_path, audio_name)
print("\n{}\nConversion for {}...\n".format("*" * 10, audio_name))

Expand Down
16 changes: 8 additions & 8 deletions egs/tts/VALLE/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ Specify the `processed_dir` and the `log_dir` and for saving the processed data
Run the `run.sh` as the preproces stage (set `--stage 1`):

```bash
sh egs/tts/VALLE/run.sh --stage 1
python egs/tts/VALLE/run.py --stage 1
```

> **NOTE:** The `CUDA_VISIBLE_DEVICES` is set as `"0"` in default. You can change it when running `run.sh` by specifying such as `--gpu "1"`.
> **NOTE:** The `CUDA_VISIBLE_DEVICES` is set as `"0"` in default. You can change it when running `run.py` by specifying such as `--gpu "1"`.


## 3. Training
Expand All @@ -74,31 +74,31 @@ We provide the default hyparameters in the `exp_config.json`. They can work on s

### Run

Run the `run.sh` as the training stage (set `--stage 2`). Specify a experimental name to run the following command. The tensorboard logs and checkpoints will be saved in `Amphion/ckpts/tts/[YourExptName]`.
Run the `run.py` as the training stage (set `--stage 2`). Specify a experimental name to run the following command. The tensorboard logs and checkpoints will be saved in `Amphion/ckpts/tts/[YourExptName]`.

Specifically, VALL-E need to train a autoregressive (AR) model and then a non-autoregressive (NAR) model. So, you can set `--model_train_stage 1` to train AR model, and set `--model_train_stage 2` to train NAR model, where `--ar_model_ckpt_dir` should be set as the ckeckpoint path to the trained AR model.


Train a AR moel, just run:

```bash
sh egs/tts/VALLE/run.sh --stage 2 --model_train_stage 1 --name [YourExptName]
python egs/tts/VALLE/run.py --stage 2 --model_train_stage 1 --name [YourExptName]
```

Train a NAR model, just run:
```bash
sh egs/tts/VALLE/run.sh --stage 2 --model_train_stage 2 --ar_model_ckpt_dir [ARModelPath] --name [YourExptName]
python egs/tts/VALLE/run.py --stage 2 --model_train_stage 2 --ar_model_ckpt_dir [ARModelPath] --name [YourExptName]
```
<!-- > **NOTE:** To train a NAR model, `--checkpoint_path` should be set as the ckeckpoint path to the trained AR model. -->

> **NOTE:** The `CUDA_VISIBLE_DEVICES` is set as `"0"` in default. You can change it when running `run.sh` by specifying such as `--gpu "0,1,2,3"`.
> **NOTE:** The `CUDA_VISIBLE_DEVICES` is set as `"0"` in default. You can change it when running `run.py` by specifying such as `--gpu "0,1,2,3"`.


## 4. Inference

### Configuration

For inference, you need to specify the following configurations when running `run.sh`:
For inference, you need to specify the following configurations when running `run.py`:



Expand All @@ -117,7 +117,7 @@ For inference, you need to specify the following configurations when running `ru
For example, if you want to generate a single clip of speech, just run:

```bash
sh egs/tts/VALLE/run.sh --stage 3 --gpu "0" \
python egs/tts/VALLE/run.py --stage 3 --gpu "0" \
--infer_expt_dir Amphion/ckpts/tts/[YourExptName] \
--infer_output_dir Amphion/ckpts/tts/[YourExptName]/result \
--infer_mode "single" \
Expand Down
105 changes: 105 additions & 0 deletions egs/tts/VALLE/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os
import sys
import subprocess
from argparse import ArgumentParser


# Set up directories
work_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
print(f"Work Directory: {work_dir}")

os.environ['WORK_DIR'] = work_dir
os.environ['PYTHONPATH'] = work_dir
os.environ['PYTHONIOENCODING'] = 'UTF-8'

# Build Monotonic Align Module
os.chdir(os.path.join(work_dir, 'modules', 'monotonic_align'))
os.makedirs('monotonic_align', exist_ok=True)
subprocess.run(['python', 'setup.py', 'build_ext', '--inplace'], check=True)
os.chdir(work_dir)

# Parse parameters
parser = ArgumentParser()
parser.add_argument('-c', '--config', help='Experimental Configuration File')
parser.add_argument('-n', '--name', help='Experimental Name')
parser.add_argument('-s', '--stage', type=int, help='Running Stage')
parser.add_argument('--gpu', type=str, help='Visible GPU machines')
parser.add_argument('--model_train_stage', type=str, help='Model Training Stage')
parser.add_argument('--ar_model_ckpt_dir', type=str, help='The stage1 ckpt dir')
parser.add_argument('--infer_expt_dir', type=str, help='The experiment dir')
parser.add_argument('--infer_output_dir', type=str, help='The output dir to save inferred audios')
parser.add_argument('--infer_mode', type=str, help='The inference mode')
parser.add_argument('--infer_test_list_file', type=str, help='The inference test list file')
parser.add_argument('--infer_text', type=str, help='The text to be synthesized from')
parser.add_argument('--infer_text_prompt', type=str, help='The inference text prompt')
parser.add_argument('--infer_audio_prompt', type=str, help='The inference audio prompt')
args = parser.parse_args()

# Check required parameters
if args.stage is None:
print("Error: Please specify the running stage")
sys.exit(1)

if args.config is None:
args.config = os.path.join(work_dir, 'exp_config.json')
print(f"Experimental Configuration File: {args.config}")

if args.gpu is None:
args.gpu = '0'

# Features Extraction
if args.stage == 1:
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cmd = ['python', os.path.join(work_dir, "bins", "tts", "preprocess.py"), '--config', args.config, '--num_workers', '4']
subprocess.run(cmd, check=True, cwd=work_dir)

# Training
if args.stage == 2:
if args.name is None:
print("Error: Please specify the experiments name")
sys.exit(1)

if args.model_train_stage == '2' and args.ar_model_ckpt_dir is None:
print("Error: Please specify the checkpoint path to the trained model in stage1.")
sys.exit(1)

if args.model_train_stage == '1':
args.ar_model_ckpt_dir = None

print(f"Experimental Name: {args.name}")

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cmd = ['python', os.path.join(work_dir, "bins", "tts", "train.py"), '--config', args.config, '--exp_name', args.name, '--log_level', 'debug', '--train_stage', args.model_train_stage, '--checkpoint_path', args.ar_model_ckpt_dir]
subprocess.run(cmd, check=True)

# Inference
if args.stage == 3:
if args.infer_expt_dir is None:
print("Error: Please specify the experimental directory. The value is like [Your path to save logs and checkpoints]/[YourExptName]")
sys.exit(1)

if args.infer_output_dir is None:
args.infer_output_dir = os.path.join(args.infer_expt_dir, 'result')

if args.infer_mode is None:
print("Error: Please specify the inference mode, e.g., \"batch\", \"single\"")
sys.exit(1)

if args.infer_mode == 'batch' and args.infer_test_list_file is None:
print("Error: Please specify the test list file used in inference when the inference mode is batch")
sys.exit(1)

if args.infer_mode == 'single' and args.infer_text is None:
print("Error: Please specify the text to be synthesized when the inference mode is single")
sys.exit(1)

if args.infer_mode == 'single':
print(f'Text: {args.infer_text}')
args.infer_test_list_file = None
elif args.infer_mode == 'batch':
args.infer_text = ""
args.infer_text_prompt = ""
args.infer_audio_prompt = ""

cmd = ['python', os.path.join(work_dir, "bins", "tts", "inference.py"), '--config', args.config, '--log_level', 'debug', '--acoustics_dir', args.infer_expt_dir, '--output_dir', args.infer_output_dir, '--mode', args.infer_mode, '--text', args.infer_text, '--text_prompt', args.infer_text_prompt, '--audio_prompt', args.infer_audio_prompt, '--test_list_file', args.infer_test_list_file]
subprocess.run(cmd, check=True)
158 changes: 0 additions & 158 deletions egs/tts/VALLE/run.sh

This file was deleted.

2 changes: 1 addition & 1 deletion evaluation/metrics/similarity/resemblyzer_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def extract_resemblyzer_similarity(target_path, reference_path, dump_dir):

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

filename = target_path.split("/")[-1]
filename = os.path.basename(target_path)
csv_file_name = f"similarity_results_{filename}.csv"
dump_dir = dump_dir + "/" + csv_file_name

Expand Down
4 changes: 2 additions & 2 deletions models/base/base_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def get_vocoder_info(self):
os.path.dirname(self.checkpoint_dir_vocoder), "args.json"
)
self.cfg.vocoder = load_config(self.vocoder_cfg, lowercase=True)
self.vocoder_tag = self.checkpoint_dir_vocoder.split("/")[-2].split(":")[-1]
self.vocoder_steps = self.checkpoint_dir_vocoder.split("/")[-1].split(".")[0]
self.vocoder_tag = os.path.split(self.checkpoint_dir_vocoder)[-2].split(":")[-1]
self.vocoder_steps = os.path.splitext(os.path.basename(self.checkpoint_dir_vocoder))[0]

def build_test_utt_data(self):
raise NotImplementedError
Expand Down
Loading
Loading