Skip to content

Commit 90c9ccc

Browse files
NsfHifigan在DML中上采样出现错误以及SourceModuleHnNSF这两个BUG的修复
1 parent 72deb15 commit 90c9ccc

File tree

5 files changed

+276
-220
lines changed

5 files changed

+276
-220
lines changed

onnx_export.py

+133-51
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,138 @@
1+
import json
12
import torch
2-
33
import utils
4-
from onnxexport.model_onnx import SynthesizerTrn
5-
6-
7-
def main(NetExport):
8-
path = "SoVits4.0"
9-
if NetExport:
10-
device = torch.device("cpu")
11-
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
12-
SVCVITS = SynthesizerTrn(
13-
hps.data.filter_length // 2 + 1,
14-
hps.train.segment_size // hps.data.hop_length,
15-
**hps.model)
16-
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
17-
_ = SVCVITS.eval().to(device)
18-
for i in SVCVITS.parameters():
19-
i.requires_grad = False
20-
21-
n_frame = 10
22-
test_hidden_unit = torch.rand(1, n_frame, 256)
23-
test_pitch = torch.rand(1, n_frame)
24-
test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
25-
test_uv = torch.ones(1, n_frame, dtype=torch.float32)
26-
test_noise = torch.randn(1, 192, n_frame)
27-
test_sid = torch.LongTensor([0])
28-
input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
29-
output_names = ["audio", ]
30-
31-
torch.onnx.export(SVCVITS,
32-
(
33-
test_hidden_unit.to(device),
34-
test_pitch.to(device),
35-
test_mel2ph.to(device),
36-
test_uv.to(device),
37-
test_noise.to(device),
38-
test_sid.to(device)
39-
),
40-
f"checkpoints/{path}/model.onnx",
41-
dynamic_axes={
42-
"c": [0, 1],
43-
"f0": [1],
44-
"mel2ph": [1],
45-
"uv": [1],
46-
"noise": [2],
47-
},
48-
do_constant_folding=False,
49-
opset_version=16,
50-
verbose=False,
51-
input_names=input_names,
52-
output_names=output_names)
4+
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
5+
6+
7+
def main():
8+
path = "crs"
9+
10+
device = torch.device("cpu")
11+
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
12+
SVCVITS = SynthesizerTrn(
13+
hps.data.filter_length // 2 + 1,
14+
hps.train.segment_size // hps.data.hop_length,
15+
**hps.model)
16+
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
17+
_ = SVCVITS.eval().to(device)
18+
for i in SVCVITS.parameters():
19+
i.requires_grad = False
20+
21+
num_frames = 200
22+
23+
test_hidden_unit = torch.rand(1, num_frames, SVCVITS.gin_channels)
24+
test_pitch = torch.rand(1, num_frames)
25+
test_vol = torch.rand(1, num_frames)
26+
test_mel2ph = torch.LongTensor(torch.arange(0, num_frames)).unsqueeze(0)
27+
test_uv = torch.ones(1, num_frames, dtype=torch.float32)
28+
test_noise = torch.randn(1, 192, num_frames)
29+
test_sid = torch.LongTensor([0])
30+
export_mix = True
31+
if len(hps.spk) < 2:
32+
export_mix = False
33+
34+
if export_mix:
35+
spk_mix = []
36+
n_spk = len(hps.spk)
37+
for i in range(n_spk):
38+
spk_mix.append(1.0/float(n_spk))
39+
test_sid = torch.tensor(spk_mix)
40+
SVCVITS.export_chara_mix(hps.spk)
41+
test_sid = test_sid.unsqueeze(0)
42+
test_sid = test_sid.repeat(num_frames, 1)
43+
44+
SVCVITS.eval()
45+
46+
if export_mix:
47+
daxes = {
48+
"c": [0, 1],
49+
"f0": [1],
50+
"mel2ph": [1],
51+
"uv": [1],
52+
"noise": [2],
53+
"sid":[0]
54+
}
55+
else:
56+
daxes = {
57+
"c": [0, 1],
58+
"f0": [1],
59+
"mel2ph": [1],
60+
"uv": [1],
61+
"noise": [2]
62+
}
63+
64+
input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
65+
output_names = ["audio", ]
66+
67+
if SVCVITS.vol_embedding:
68+
input_names.append("vol")
69+
vol_dadict = {"vol" : [1]}
70+
daxes.update(vol_dadict)
71+
test_inputs = (
72+
test_hidden_unit.to(device),
73+
test_pitch.to(device),
74+
test_mel2ph.to(device),
75+
test_uv.to(device),
76+
test_noise.to(device),
77+
test_sid.to(device),
78+
test_vol.to(device)
79+
)
80+
else:
81+
test_inputs = (
82+
test_hidden_unit.to(device),
83+
test_pitch.to(device),
84+
test_mel2ph.to(device),
85+
test_uv.to(device),
86+
test_noise.to(device),
87+
test_sid.to(device)
88+
)
89+
90+
# SVCVITS = torch.jit.script(SVCVITS)
91+
SVCVITS(test_hidden_unit.to(device),
92+
test_pitch.to(device),
93+
test_mel2ph.to(device),
94+
test_uv.to(device),
95+
test_noise.to(device),
96+
test_sid.to(device),
97+
test_vol.to(device))
98+
99+
SVCVITS.dec.OnnxExport()
100+
101+
torch.onnx.export(
102+
SVCVITS,
103+
test_inputs,
104+
f"checkpoints/{path}/{path}_SoVits.onnx",
105+
dynamic_axes=daxes,
106+
do_constant_folding=False,
107+
opset_version=16,
108+
verbose=False,
109+
input_names=input_names,
110+
output_names=output_names
111+
)
112+
113+
vec_lay = "layer-12" if SVCVITS.gin_channels == 768 else "layer-9"
114+
spklist = []
115+
for key in hps.spk.keys():
116+
spklist.append(key)
117+
118+
MoeVSConf = {
119+
"Folder" : f"{path}",
120+
"Name" : f"{path}",
121+
"Type" : "SoVits",
122+
"Rate" : hps.data.sampling_rate,
123+
"Hop" : hps.data.hop_length,
124+
"Hubert": f"vec-{SVCVITS.gin_channels}-{vec_lay}",
125+
"SoVits4": True,
126+
"SoVits3": False,
127+
"CharaMix": export_mix,
128+
"Volume": SVCVITS.vol_embedding,
129+
"HiddenSize": SVCVITS.gin_channels,
130+
"Characters": spklist
131+
}
132+
133+
with open(f"checkpoints/{path}.json", 'w') as MoeVsConfFile:
134+
json.dump(MoeVSConf, MoeVsConfFile, indent = 4)
53135

54136

55137
if __name__ == '__main__':
56-
main(True)
138+
main()

onnx_export_old.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import torch
2+
3+
import utils
4+
from onnxexport.model_onnx import SynthesizerTrn
5+
6+
7+
def main(NetExport):
8+
path = "SoVits4.0"
9+
if NetExport:
10+
device = torch.device("cpu")
11+
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
12+
SVCVITS = SynthesizerTrn(
13+
hps.data.filter_length // 2 + 1,
14+
hps.train.segment_size // hps.data.hop_length,
15+
**hps.model)
16+
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
17+
_ = SVCVITS.eval().to(device)
18+
for i in SVCVITS.parameters():
19+
i.requires_grad = False
20+
21+
n_frame = 10
22+
test_hidden_unit = torch.rand(1, n_frame, 256)
23+
test_pitch = torch.rand(1, n_frame)
24+
test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
25+
test_uv = torch.ones(1, n_frame, dtype=torch.float32)
26+
test_noise = torch.randn(1, 192, n_frame)
27+
test_sid = torch.LongTensor([0])
28+
input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
29+
output_names = ["audio", ]
30+
31+
torch.onnx.export(SVCVITS,
32+
(
33+
test_hidden_unit.to(device),
34+
test_pitch.to(device),
35+
test_mel2ph.to(device),
36+
test_uv.to(device),
37+
test_noise.to(device),
38+
test_sid.to(device)
39+
),
40+
f"checkpoints/{path}/model.onnx",
41+
dynamic_axes={
42+
"c": [0, 1],
43+
"f0": [1],
44+
"mel2ph": [1],
45+
"uv": [1],
46+
"noise": [2],
47+
},
48+
do_constant_folding=False,
49+
opset_version=16,
50+
verbose=False,
51+
input_names=input_names,
52+
output_names=output_names)
53+
54+
55+
if __name__ == '__main__':
56+
main(True)

onnx_export_speaker_mix.py

-138
This file was deleted.

0 commit comments

Comments
 (0)