Skip to content

Commit dd71473

Browse files
committed
Cleanup generate aligned features
fixes a bug for r > 1
1 parent 5aea7cd commit dd71473

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

generate_aligned_predictions.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,13 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
4949
model.make_generation_fast_()
5050

5151
mel_org = np.load(join(in_dir, mel_filename))
52-
mel = Variable(torch.from_numpy(mel_org)).unsqueeze(0).contiguous()
52+
# zero padd
53+
b_pad = r # imitates initial state
54+
e_pad = r - len(mel_org) % r if len(mel_org) % r > 0 else 0
55+
mel = np.pad(mel_org, [(b_pad, e_pad), (0, 0)],
56+
mode="constant", constant_values=0)
57+
58+
mel = Variable(torch.from_numpy(mel)).unsqueeze(0).contiguous()
5359

5460
# Downsample mel spectrogram
5561
if downsample_step > 1:
@@ -78,10 +84,10 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
7884
frame_positions=frame_positions, speaker_ids=speaker_ids)
7985

8086
mel_output = mel_outputs[0].data.cpu().numpy()
81-
8287
# **Time resolution adjustment**
83-
# remove begenning audio used for first mel prediction
84-
wav = np.load(join(in_dir, audio_filename))[hparams.hop_size * downsample_step:]
88+
mel_output = mel_output[:-(b_pad + e_pad)]
89+
90+
wav = np.load(join(in_dir, audio_filename))
8591
assert len(wav) % hparams.hop_size == 0
8692

8793
# Coarse upsample just for convenience
@@ -92,18 +98,13 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
9298
# the original mel length
9399
assert mel_output.shape[0] >= mel_org.shape[0]
94100

95-
# Trim mel output
96-
expected_frames = len(wav) // hparams.hop_size
97-
mel_output = mel_output[:expected_frames]
98-
99101
# Make sure we have correct lengths
100102
assert mel_output.shape[0] * hparams.hop_size == len(wav)
101103

102104
timesteps = len(wav)
103105

104106
# save
105-
np.save(join(out_dir, audio_filename), wav.astype(np.int16),
106-
allow_pickle=False)
107+
np.save(join(out_dir, audio_filename), wav, allow_pickle=False)
107108
np.save(join(out_dir, mel_filename), mel_output.astype(np.float32),
108109
allow_pickle=False)
109110

0 commit comments

Comments
 (0)