Skip to content

Commit

Permalink
Extract some preprocessing from private repo.
Browse files Browse the repository at this point in the history
  • Loading branch information
mitsuse committed Sep 13, 2024
1 parent b78639f commit 1df1e68
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions src/torch_wae/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

from random import Random

import torch


def omit_silence(
waveform: torch.Tensor,
threshold: float,
channel_first: bool = False,
) -> torch.Tensor:
abs_waveform = torch.abs(waveform)
not_silent = abs_waveform > threshold
indices_not_silent = torch.where(
torch.any(not_silent, dim=1 if channel_first else 0)
)[0]

if len(indices_not_silent) == 0:
waveform = torch.zeros_like(waveform)
else:
begin_non_silence = indices_not_silent[0]
end_non_silence = indices_not_silent[-1]
if channel_first:
waveform = waveform[begin_non_silence : end_non_silence + 1, :]
else:
waveform = waveform[:, begin_non_silence : end_non_silence + 1]

return waveform


def crop_randomly(
x: torch.Tensor,
random: Random,
sample_rate: int,
durations: int,
) -> torch.Tensor:
c, d = x.shape
size = sample_rate * durations
pad = max(0, size - d)

if pad == 0:
start = random.randint(0, d - size)
end = start + size
return x[:, start:end]
else:
p = torch.zeros((c, pad), dtype=x.dtype).to(x.device)
return torch.cat((x, p), dim=-1)

0 comments on commit 1df1e68

Please sign in to comment.