kamisang2333
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎README.md
+24-24 b/‎README.md
+24-24
diff --git a/‎README_zh_CN.md
+4-4 b/‎README_zh_CN.md
+4-4
diff --git a/‎cluster/__init__.py
+1-1 b/‎cluster/__init__.py
+1-1
diff --git a/‎cluster/kmeans.py
+6-3 b/‎cluster/kmeans.py
+6-3
diff --git a/‎cluster/train_cluster.py
+13-15 b/‎cluster/train_cluster.py
+13-15
diff --git a/‎data_utils.py
+7-8 b/‎data_utils.py
+7-8
diff --git a/‎diffusion/data_loaders.py
+6-6 b/‎diffusion/data_loaders.py
+6-6
diff --git a/‎diffusion/diffusion.py
+12-6 b/‎diffusion/diffusion.py
+12-6
diff --git a/‎diffusion/diffusion_onnx.py
+13-11 b/‎diffusion/diffusion_onnx.py
+13-11
diff --git a/‎diffusion/dpm_solver_pytorch.py
+10-8 b/‎diffusion/dpm_solver_pytorch.py
+10-8
diff --git a/‎diffusion/infer_gt_mel.py
+1-1 b/‎diffusion/infer_gt_mel.py
+1-1
diff --git a/‎diffusion/logger/saver.py
+5-5 b/‎diffusion/logger/saver.py
+5-5
@@ -156,6 +156,7 @@ filelists/test.txt
 filelists/train.txt
 filelists/val.txt
 .idea/
+.vscode/
 .idea/modules.xml
 .idea/so-vits-svc.iml
 .idea/vcs.xml
@@ -168,3 +169,4 @@ pretrain/vec-256-layer-9.onnx
 pretrain/vec-256-layer-12.onnx
 pretrain/vec-768-layer-9.onnx
 .vscode/launch.json
+.ruff.toml
@@ -340,7 +340,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n "
 + `-lg` | `--linear_gradient`：两段音频切片的交叉淡入长度，如果强制切片后出现人声不连贯可调整该数值，如果连贯建议采用默认值0，单位为秒
 + `-f0p` | `--f0_predictor`：选择F0预测器,可选择crepe,pm,dio,harvest,默认为pm(注意：crepe为原F0使用均值滤波器)
 + `-a` | `--auto_predict_f0`：语音转换自动预测音高，转换歌声时不要打开这个会严重跑调
-+ `-cm` | `--cluster_model_path`：聚类模型或特征检索索引路径，如果没有训练聚类或特征检索则随便填
++ `-cm` | `--cluster_model_path`：聚类模型或特征检索索引路径，留空则自动设为各方案模型的默认路径，如果没有训练聚类或特征检索则随便填
 + `-cr` | `--cluster_infer_ratio`：聚类方案或特征检索占比，范围0-1，若没有训练聚类模型或特征检索则默认0即可
 + `-eh` | `--enhance`：是否使用NSF_HIFIGAN增强器,该选项对部分训练集少的模型有一定的音质增强效果，但是对训练好的模型有反面效果，默认关闭
 + `-shd` | `--shallow_diffusion`：是否使用浅层扩散，使用后可解决一部分电音问题，默认关闭，该选项打开时，NSF_HIFIGAN增强器将会被禁止
@@ -379,7 +379,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n "
   + 执行`python cluster/train_cluster.py`，模型的输出会在`logs/44k/kmeans_10000.pt`
   + 聚类模型目前可以使用gpu进行训练，执行`python cluster/train_cluster.py --gpu`
 + 推理过程：
-  + `inference_main.py`中指定`cluster_model_path`
+  + `inference_main.py`中指定`cluster_model_path` 为模型输出文件, 留空则默认为`logs/44k/kmeans_10000.pt`
   + `inference_main.py`中指定`cluster_infer_ratio`，`0`为完全不使用聚类，`1`为只使用聚类，通常设置`0.5`即可
 
 ### 特征检索
@@ -396,8 +396,8 @@ python train_index.py -c configs/config.json
 模型的输出会在`logs/44k/feature_and_index.pkl`
 
 + 推理过程：
-  + 需要首先制定`--feature_retrieval`，此时聚类方案会自动切换到特征检索方案
-  + `inference_main.py`中指定`cluster_model_path` 为模型输出文件
+  + 需要首先指定`--feature_retrieval`，此时聚类方案会自动切换到特征检索方案
+  + `inference_main.py`中指定`cluster_model_path` 为模型输出文件, 留空则默认为`logs/44k/feature_and_index.pkl`
   + `inference_main.py`中指定`cluster_infer_ratio`，`0`为完全不使用特征检索，`1`为只使用特征检索，通常设置`0.5`即可
 
 ### [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/svc-develop-team/so-vits-svc/blob/4.1-Stable/sovits4_for_colab.ipynb) [sovits4_for_colab.ipynb](https://colab.research.google.com/github/svc-develop-team/so-vits-svc/blob/4.1-Stable/sovits4_for_colab.ipynb)
 
@@ -1,7 +1,7 @@
-import numpy as np
 import torch
 from sklearn.cluster import KMeans
 
+
 def get_cluster_model(ckpt_path):
     checkpoint = torch.load(ckpt_path)
     kmeans_dict = {}
 
@@ -1,8 +1,11 @@
-import math,pdb
-import torch,pynvml
-from torch.nn.functional import normalize
 from time import time
+
 import numpy as np
+import pynvml
+import torch
+from torch.nn.functional import normalize
+
+
 # device=torch.device("cuda:0")
 def _kpp(data: torch.Tensor, k: int, sample_size: int = -1):
     """ Picks k points in the data based on the kmeans++ method.
 
@@ -1,19 +1,17 @@
-import time,pdb
-import tqdm
-from time import time as ttime
+import argparse
+import logging
 import os
+import time
 from pathlib import Path
-import logging
-import argparse
-from kmeans import KMeansGPU
-import torch
+
 import numpy as np
-from sklearn.cluster import KMeans,MiniBatchKMeans
+import torch
+import tqdm
+from kmeans import KMeansGPU
+from sklearn.cluster import KMeans, MiniBatchKMeans
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-from time import time as ttime
-import pynvml,torch
 
 def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉，虽然库支持但是也不考虑
     logger.info(f"Loading features from {in_dir}")
@@ -29,22 +27,22 @@ def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=
     features = features.astype(np.float32)
     logger.info(f"Clustering features of shape: {features.shape}")
     t = time.time()
-    if(use_gpu==False):
+    if(use_gpu is False):
         if use_minibatch:
             kmeans = MiniBatchKMeans(n_clusters=n_clusters,verbose=verbose, batch_size=4096, max_iter=80).fit(features)
         else:
             kmeans = KMeans(n_clusters=n_clusters,verbose=verbose).fit(features)
     else:
             kmeans = KMeansGPU(n_clusters=n_clusters, mode='euclidean', verbose=2 if verbose else 0,max_iter=500,tol=1e-2)#
             features=torch.from_numpy(features)#.to(device)
-            labels = kmeans.fit_predict(features)#
+            kmeans.fit_predict(features)#
 
     print(time.time()-t, "s")
 
     x = {
-            "n_features_in_": kmeans.n_features_in_ if use_gpu==False else features.shape[1],
-            "_n_threads": kmeans._n_threads if use_gpu==False else 4,
-            "cluster_centers_": kmeans.cluster_centers_ if use_gpu==False else kmeans.centroids.cpu().numpy(),
+            "n_features_in_": kmeans.n_features_in_ if use_gpu is False else features.shape[1],
+            "_n_threads": kmeans._n_threads if use_gpu is False else 4,
+            "cluster_centers_": kmeans.cluster_centers_ if use_gpu is False else kmeans.centroids.cpu().numpy(),
     }
     print("end")
 
 
@@ -1,14 +1,13 @@
-import time
 import os
 import random
+
 import numpy as np
 import torch
 import torch.utils.data
 
-import modules.commons as commons
 import utils
-from modules.mel_processing import spectrogram_torch, spec_to_mel_torch, spectrogram_torch
-from utils import load_wav_to_torch, load_filepaths_and_text
+from modules.mel_processing import spectrogram_torch
+from utils import load_filepaths_and_text, load_wav_to_torch
 
 # import h5py
 
@@ -87,7 +86,7 @@ def get_audio(self, filename):
         assert abs(audio_norm.shape[1]-lmin * self.hop_length) < 3 * self.hop_length
         spec, c, f0, uv = spec[:, :lmin], c[:, :lmin], f0[:lmin], uv[:lmin]
         audio_norm = audio_norm[:, :lmin * self.hop_length]
-        if volume!= None:
+        if volume is not None:
             volume = volume[:lmin]
         return c, f0, spec, audio_norm, spk, uv, volume
 
@@ -96,7 +95,7 @@ def random_slice(self, c, f0, spec, audio_norm, spk, uv, volume):
         #     print("skip too short audio:", filename)
         #     return None
 
-        if random.choice([True, False]) and self.vol_aug and volume!=None:
+        if random.choice([True, False]) and self.vol_aug and volume is not None:
             max_amp = float(torch.max(torch.abs(audio_norm))) + 1e-5
             max_shift = min(1, np.log10(1/max_amp))
             log10_vol_shift = random.uniform(-1, max_shift)
@@ -114,7 +113,7 @@ def random_slice(self, c, f0, spec, audio_norm, spk, uv, volume):
             end = start + 790
             spec, c, f0, uv = spec[:, start:end], c[:, start:end], f0[start:end], uv[start:end]
             audio_norm = audio_norm[:, start * self.hop_length : end * self.hop_length]
-            if volume !=None:
+            if volume is not None:
                 volume = volume[start:end]
         return c, f0, spec, audio_norm, spk, uv,volume
 
@@ -178,7 +177,7 @@ def __call__(self, batch):
             uv = row[5]
             uv_padded[i, :uv.size(0)] = uv
             volume = row[6]
-            if volume != None:
+            if volume is not None:
                 volume_padded[i, :volume.size(0)] = volume
             else :
                 volume_padded = None
 
@@ -1,13 +1,14 @@
 import os
 import random
-import re
-import numpy as np
+
 import librosa
+import numpy as np
 import torch
-import random
-from utils import repeat_expand_2d
-from tqdm import tqdm
 from torch.utils.data import Dataset
+from tqdm import tqdm
+
+from utils import repeat_expand_2d
+
 
 def traverse_dir(
         root_dir,
@@ -130,7 +131,6 @@ def __init__(
         with open(filelists,"r") as f:
             self.paths = f.read().splitlines()
         for name_ext in tqdm(self.paths, total=len(self.paths)):
-            name = os.path.splitext(name_ext)[0]
             path_audio = name_ext
             duration = librosa.get_duration(filename = path_audio, sr = self.sample_rate)
 
 
@@ -1,10 +1,10 @@
 from collections import deque
 from functools import partial
 from inspect import isfunction
-import torch.nn.functional as F
-import librosa.sequence
+
 import numpy as np
 import torch
+import torch.nn.functional as F
 from torch import nn
 from tqdm import tqdm
 
@@ -26,8 +26,10 @@ def extract(a, t, x_shape):
 
 
 def noise_like(shape, device, repeat=False):
-    repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
-    noise = lambda: torch.randn(shape, device=device)
+    def repeat_noise():
+        return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
+    def noise():
+        return torch.randn(shape, device=device)
     return repeat_noise() if repeat else noise()
 
 
@@ -253,7 +255,11 @@ def forward(self,
 
             if method is not None and infer_speedup > 1:
                 if method == 'dpm-solver' or method == 'dpm-solver++':
-                    from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
+                    from .dpm_solver_pytorch import (
+                        DPM_Solver,
+                        NoiseScheduleVP,
+                        model_wrapper,
+                    )
                     # 1. Define the noise schedule.
                     noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
 
@@ -331,7 +337,7 @@ def wrapped(x, t, **kwargs):
                                 infer_speedup, cond=cond
                             )
                 elif method == 'unipc':
-                    from .uni_pc import NoiseScheduleVP, model_wrapper, UniPC
+                    from .uni_pc import NoiseScheduleVP, UniPC, model_wrapper
                     # 1. Define the noise schedule.
                     noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
 
 
@@ -1,15 +1,14 @@
+import math
 from collections import deque
 from functools import partial
 from inspect import isfunction
-import torch.nn.functional as F
-import librosa.sequence
+
 import numpy as np
-from torch.nn import Conv1d
-from torch.nn import Mish
 import torch
+import torch.nn.functional as F
 from torch import nn
+from torch.nn import Conv1d, Mish
 from tqdm import tqdm
-import math
 
 
 def exists(x):
@@ -27,8 +26,10 @@ def extract(a, t):
 
 
 def noise_like(shape, device, repeat=False):
-    repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
-    noise = lambda: torch.randn(shape, device=device)
+    def repeat_noise():
+        return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
+    def noise():
+        return torch.randn(shape, device=device)
     return repeat_noise() if repeat else noise()
 
 
@@ -389,7 +390,11 @@ def org_forward(self,
 
             if method is not None and infer_speedup > 1:
                 if method == 'dpm-solver':
-                    from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
+                    from .dpm_solver_pytorch import (
+                        DPM_Solver,
+                        NoiseScheduleVP,
+                        model_wrapper,
+                    )
                     # 1. Define the noise schedule.
                     noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
 
@@ -576,9 +581,6 @@ def forward(self, condition=None, init_noise=None, pndms=None, k_step=None):
         plms_noise_stage = torch.tensor(0, dtype=torch.long, device=device)
         noise_list = torch.zeros((0, 1, 1, self.mel_bins, n_frames), device=device)
 
-        ot = step_range[0]
-        ot_1 = torch.full((1,), ot, device=device, dtype=torch.long)
-
         for t in step_range:
             t_1 = torch.full((1,), t, device=device, dtype=torch.long)
             noise_pred = self.denoise_fn(x, t_1, cond)
 
@@ -1,6 +1,4 @@
 import torch
-import torch.nn.functional as F
-import math
 
 
 class NoiseScheduleVP:
@@ -559,7 +557,6 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal
             x_t: A pytorch tensor. The approximated solution at time `t`.
         """
         ns = self.noise_schedule
-        dims = x.dim()
         lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
         h = lambda_t - lambda_s
         log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t)
@@ -984,20 +981,25 @@ def dpm_solver_adaptive(self, x, order, t_T, t_0, h_init=0.05, atol=0.0078, rtol
         nfe = 0
         if order == 2:
             r1 = 0.5
-            lower_update = lambda x, s, t: self.dpm_solver_first_update(x, s, t, return_intermediate=True)
-            higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
+            def lower_update(x, s, t):
+                return self.dpm_solver_first_update(x, s, t, return_intermediate=True)
+            def higher_update(x, s, t, **kwargs):
+                return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
         elif order == 3:
             r1, r2 = 1. / 3., 2. / 3.
-            lower_update = lambda x, s, t: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
-            higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
+            def lower_update(x, s, t):
+                return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
+            def higher_update(x, s, t, **kwargs):
+                return self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
         else:
             raise ValueError("For adaptive step size solver, order must be 2 or 3, got {}".format(order))
         while torch.abs((s - t_0)).mean() > t_err:
             t = ns.inverse_lambda(lambda_s + h)
             x_lower, lower_noise_kwargs = lower_update(x, s, t)
             x_higher = higher_update(x, s, t, **lower_noise_kwargs)
             delta = torch.max(torch.ones_like(x).to(x) * atol, rtol * torch.max(torch.abs(x_lower), torch.abs(x_prev)))
-            norm_fn = lambda v: torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
+            def norm_fn(v):
+                return torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
             E = norm_fn((x_higher - x_lower) / delta).max()
             if torch.all(E <= 1.):
                 x = x_higher
 
@@ -1,6 +1,6 @@
-import numpy as np
 import torch
 import torch.nn.functional as F
+
 from diffusion.unit2mel import load_model_vocoder
 
 
 
@@ -2,16 +2,16 @@
 author: wayn391@mastertones
 '''
 
+import datetime
 import os
-import json
 import time
-import yaml
-import datetime
-import torch
+
 import matplotlib.pyplot as plt
-from . import utils
+import torch
+import yaml
 from torch.utils.tensorboard import SummaryWriter
 
+
 class Saver(object):
     def __init__(
             self,