Skip to content

Commit 63f8895

Browse files
committed
New Tiny Model Updata
1 parent a8acfa0 commit 63f8895

File tree

5 files changed

+23
-12
lines changed

5 files changed

+23
-12
lines changed

compress_model.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def copyStateDict(state_dict):
1818
return new_state_dict
1919

2020

21-
def removeOptimizer(config: str, input_model: str, output_model: str):
21+
def removeOptimizer(config: str, input_model: str, ishalf: bool, output_model: str):
2222
hps = utils.get_hparams_from_file(config)
2323

2424
net_g = SynthesizerTrn(hps.data.filter_length // 2 + 1,
@@ -35,8 +35,8 @@ def removeOptimizer(config: str, input_model: str, output_model: str):
3535
keys = []
3636
for k, v in new_dict_g['model'].items():
3737
keys.append(k)
38-
39-
new_dict_g = {k: new_dict_g['model'][k] for k in keys}
38+
39+
new_dict_g = {k: new_dict_g['model'][k].half() for k in keys} if ishalf else {k: new_dict_g['model'][k] for k in keys}
4040

4141
torch.save(
4242
{
@@ -56,14 +56,16 @@ def removeOptimizer(config: str, input_model: str, output_model: str):
5656
default='configs/config.json')
5757
parser.add_argument("-i", "--input", type=str)
5858
parser.add_argument("-o", "--output", type=str, default=None)
59-
59+
parser.add_argument('-hf', '--half', action='store_true', default=False, help='Save as FP16')
60+
6061
args = parser.parse_args()
6162

6263
output = args.output
6364

6465
if output is None:
6566
import os.path
6667
filename, ext = os.path.splitext(args.input)
67-
output = filename + "_release" + ext
68+
half = "_half" if args.half else ""
69+
output = filename + "_release" + half + ext
6870

69-
removeOptimizer(args.config, args.input, output)
71+
removeOptimizer(args.config, args.input, args.half, output)

configs_template/config_template.json

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
"speaker_embedding":false,
6565
"vol_embedding":false,
6666
"use_depthwise_conv":false,
67+
"flow_share_parameter": false,
6768
"use_automatic_f0_prediction": true
6869
},
6970
"spk": {

models.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ def __init__(self,
2020
dilation_rate,
2121
n_layers,
2222
n_flows=4,
23-
gin_channels=0):
23+
gin_channels=0,
24+
share_parameter=False
25+
):
2426
super().__init__()
2527
self.channels = channels
2628
self.hidden_channels = hidden_channels
@@ -31,10 +33,13 @@ def __init__(self,
3133
self.gin_channels = gin_channels
3234

3335
self.flows = nn.ModuleList()
36+
37+
self.wn = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=0, gin_channels=gin_channels) if share_parameter else None
38+
3439
for i in range(n_flows):
3540
self.flows.append(
3641
modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers,
37-
gin_channels=gin_channels, mean_only=True))
42+
gin_channels=gin_channels, mean_only=True, wn_sharing_parameter=self.wn))
3843
self.flows.append(modules.Flip())
3944

4045
def forward(self, x, x_mask, g=None, reverse=False):
@@ -320,6 +325,7 @@ def __init__(self,
320325
vocoder_name = "nsf-hifigan",
321326
use_depthwise_conv = False,
322327
use_automatic_f0_prediction = True,
328+
flow_share_parameter = False,
323329
n_flow_layer = 4,
324330
**kwargs):
325331

@@ -386,7 +392,7 @@ def __init__(self,
386392
self.dec = Generator(h=hps)
387393

388394
self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels)
389-
self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, n_flow_layer, gin_channels=gin_channels)
395+
self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, n_flow_layer, gin_channels=gin_channels, share_parameter= flow_share_parameter)
390396
if self.use_automatic_f0_prediction:
391397
self.f0_decoder = F0Decoder(
392398
1,

modules/modules.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,9 @@ def __init__(self,
263263
n_layers,
264264
p_dropout=0,
265265
gin_channels=0,
266-
mean_only=False):
266+
mean_only=False,
267+
wn_sharing_parameter=None
268+
):
267269
assert channels % 2 == 0, "channels should be divisible by 2"
268270
super().__init__()
269271
self.channels = channels
@@ -275,7 +277,7 @@ def __init__(self,
275277
self.mean_only = mean_only
276278

277279
self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
278-
self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, gin_channels=gin_channels)
280+
self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, gin_channels=gin_channels) if wn_sharing_parameter is None else wn_sharing_parameter
279281
self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
280282
self.post.weight.data.zero_()
281283
self.post.bias.data.zero_()

preprocess_flist_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def get_wav_duration(file_path):
8585
config_template["model"]["ssl_dim"] = config_template["model"]["filter_channels"] = config_template["model"]["gin_channels"] = 768
8686
d_config_template["data"]["encoder_out_channels"] = 768
8787
elif args.speech_encoder == "vec256l9" or args.speech_encoder == 'hubertsoft':
88-
config_template["model"]["ssl_dim"] = config_template["model"]["filter_channels"] = config_template["model"]["gin_channels"] = 256
88+
config_template["model"]["ssl_dim"] = config_template["model"]["gin_channels"] = 256
8989
d_config_template["data"]["encoder_out_channels"] = 256
9090
elif args.speech_encoder == "whisper-ppg" or args.speech_encoder == 'cnhubertlarge':
9191
config_template["model"]["ssl_dim"] = config_template["model"]["filter_channels"] = config_template["model"]["gin_channels"] = 1024

0 commit comments

Comments
 (0)