New Tiny

ylzz1997 · ylzz1997 · commit 57c079fbfab2 · 2023-06-30T00:38:09.000+08:00
diff --git a/configs_template/config_template.json b/configs_template/config_template.json
@@ -54,6 +54,7 @@
     "upsample_initial_channel": 512,
     "upsample_kernel_sizes": [16,16, 4, 4, 4],
     "n_layers_q": 3,
+    "n_flow_layer": 4,
     "use_spectral_norm": false,
     "gin_channels": 768,
     "ssl_dim": 768,
@@ -63,7 +64,6 @@
     "speaker_embedding":false,
     "vol_embedding":false,
     "use_depthwise_conv":false,
-    "use_depthwise_transposeconv":false,
     "use_automatic_f0_prediction": true
   },
   "spk": {
diff --git a/models.py b/models.py
@@ -322,8 +322,8 @@ def __init__(self,
                  vol_embedding=False,
                  vocoder_name = "nsf-hifigan",
                  use_depthwise_conv = False,
-                 use_depthwise_transposeconv = False,
                  use_automatic_f0_prediction = True,
+                 n_flow_layer = 4,
                  **kwargs):
 
         super().__init__()
@@ -372,8 +372,7 @@ def __init__(self,
             "upsample_initial_channel": upsample_initial_channel,
             "upsample_kernel_sizes": upsample_kernel_sizes,
             "gin_channels": gin_channels,
-            "use_depthwise_conv":use_depthwise_conv,
-            "use_depthwise_transposeconv":use_depthwise_transposeconv
+            "use_depthwise_conv":use_depthwise_conv
         }
         
         modules.set_Conv1dModel(self.use_depthwise_conv)
@@ -390,7 +389,7 @@ def __init__(self,
             self.dec = Generator(h=hps)
 
         self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels)
-        self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels)
+        self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, n_flow_layer, gin_channels=gin_channels)
         if self.use_automatic_f0_prediction:
             self.f0_decoder = F0Decoder(
                 1,
diff --git a/modules/modules.py b/modules/modules.py
@@ -66,47 +66,6 @@ def forward(self, x, x_mask):
     return x * x_mask
 
 
-class DDSConv(nn.Module):
-  """
-  Dialted and Depth-Separable Convolution
-  """
-  def __init__(self, channels, kernel_size, n_layers, p_dropout=0.):
-    super().__init__()
-    self.channels = channels
-    self.kernel_size = kernel_size
-    self.n_layers = n_layers
-    self.p_dropout = p_dropout
-
-    self.drop = nn.Dropout(p_dropout)
-    self.convs_sep = nn.ModuleList()
-    self.convs_1x1 = nn.ModuleList()
-    self.norms_1 = nn.ModuleList()
-    self.norms_2 = nn.ModuleList()
-    for i in range(n_layers):
-      dilation = kernel_size ** i
-      padding = (kernel_size * dilation - dilation) // 2
-      self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size, 
-          groups=channels, dilation=dilation, padding=padding
-      ))
-      self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
-      self.norms_1.append(LayerNorm(channels))
-      self.norms_2.append(LayerNorm(channels))
-
-  def forward(self, x, x_mask, g=None):
-    if g is not None:
-      x = x + g
-    for i in range(self.n_layers):
-      y = self.convs_sep[i](x * x_mask)
-      y = self.norms_1[i](y)
-      y = F.gelu(y)
-      y = self.convs_1x1[i](y)
-      y = self.norms_2[i](y)
-      y = F.gelu(y)
-      y = self.drop(y)
-      x = x + y
-    return x * x_mask
-
-
 class WN(torch.nn.Module):
   def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0):
     super(WN, self).__init__()
diff --git a/vdecoder/hifigan/models.py b/vdecoder/hifigan/models.py
@@ -1,28 +1,18 @@
-import os
 import json
-from .env import AttrDict
+import os
+
 import numpy as np
 import torch
-import torch.nn.functional as F
 import torch.nn as nn
-from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm,spectral_norm
-from .utils import init_weights, get_padding
+import torch.nn.functional as F
+from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
+from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
 
-from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D, Depthwise_Separable_TransposeConv1D
+from .env import AttrDict
+from .utils import get_padding, init_weights
 
 LRELU_SLOPE = 0.1
 
-Conv1dModel = nn.Conv1d
-ConvTranspose1dModel = nn.ConvTranspose1d
-
-def set_Conv1dModel(use_depthwise_conv):
-    global Conv1dModel
-    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
-
-def set_ConvTranspose1dModel(use_depthwise_transposeconv):
-    global ConvTranspose1dModel
-    ConvTranspose1dModel = Depthwise_Separable_TransposeConv1D if use_depthwise_transposeconv else nn.ConvTranspose1d
 
 def load_model(model_path, device='cuda'):
     config_file = os.path.join(os.path.split(model_path)[0], 'config.json')
@@ -48,21 +38,21 @@ def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
@@ -78,19 +68,19 @@ def forward(self, x):
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.convs2:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
 
 
 class ResBlock2(torch.nn.Module):
     def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
@@ -104,7 +94,7 @@ def forward(self, x):
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
 
 
 def padDiff(x):
@@ -211,8 +201,6 @@ def forward(self, f0):
         output uv: tensor(batchsize=1, length, 1)
         """
         with torch.no_grad():
-            f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
-                                 device=f0.device)
             # fundamental component
             fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
 
@@ -289,28 +277,25 @@ class Generator(torch.nn.Module):
     def __init__(self, h):
         super(Generator, self).__init__()
         self.h = h
-        
-        set_Conv1dModel(h["use_depthwise_conv"])
-        set_ConvTranspose1dModel(h["use_depthwise_transposeconv"])
-        
+
         self.num_kernels = len(h["resblock_kernel_sizes"])
         self.num_upsamples = len(h["upsample_rates"])
         self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"]))
         self.m_source = SourceModuleHnNSF(
             sampling_rate=h["sampling_rate"],
             harmonic_num=8)
         self.noise_convs = nn.ModuleList()
-        self.conv_pre = weight_norm_modules(Conv1dModel(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
+        self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
         resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2
         self.ups = nn.ModuleList()
         for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.ups.append(weight_norm_modules(
-                ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u + 1 ) // 2)))
+            self.ups.append(weight_norm(
+                ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
+                                k, u, padding=(k - u +1 ) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
-                self.noise_convs.append(Conv1dModel(
+                self.noise_convs.append(Conv1d(
                     1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
@@ -320,7 +305,7 @@ def __init__(self, h):
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
                 self.resblocks.append(resblock(h, ch, k, d))
 
-        self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
+        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
         self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1)
@@ -357,18 +342,18 @@ def forward(self, x, f0, g=None):
     def remove_weight_norm(self):
         print('Removing weight norm...')
         for l in self.ups:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.resblocks:
             l.remove_weight_norm()
-        remove_weight_norm_modules(self.conv_pre)
-        remove_weight_norm_modules(self.conv_post)
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
 
 
 class DiscriminatorP(torch.nn.Module):
     def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
         super(DiscriminatorP, self).__init__()
         self.period = period
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
             norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@@ -427,7 +412,7 @@ def forward(self, y, y_hat):
 class DiscriminatorS(torch.nn.Module):
     def __init__(self, use_spectral_norm=False):
         super(DiscriminatorS, self).__init__()
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv1d(1, 128, 15, 1, padding=7)),
             norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
diff --git a/vdecoder/hifigan/utils.py b/vdecoder/hifigan/utils.py
@@ -1,10 +1,10 @@
 import glob
 import os
-import matplotlib
-import torch
-from torch.nn.utils import weight_norm
+
 # matplotlib.use("Agg")
 import matplotlib.pylab as plt
+import torch
+from torch.nn.utils import weight_norm
 
 
 def plot_spectrogram(spectrogram):
@@ -21,10 +21,7 @@ def plot_spectrogram(spectrogram):
 
 def init_weights(m, mean=0.0, std=0.01):
     classname = m.__class__.__name__
-    if "Depthwise_Separable" in classname:
-      m.depth_conv.weight.data.normal_(mean, std)
-      m.point_conv.weight.data.normal_(mean, std)
-    elif classname.find("Conv") != -1:
+    if classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)
 
 
diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py
diff --git a/vdecoder/hifiganwithsnake/utils.py b/vdecoder/hifiganwithsnake/utils.py