This repository has been archived by the owner on May 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathQTTS.py
200 lines (164 loc) · 7.43 KB
/
QTTS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from ctypes import *
from numpy import block
from Recorder import Recorder
from MSP_TYPES import *
from rich import print
import traceback
from utils import *
from MSP_CMN import MSP_CMN
from params import args
VOICE_NAME = 'xiaoyan'
PUREXTTS_RES_PATH = "fo|res/xtts/{}.jet;fo|res/xtts/common.jet".format(VOICE_NAME)
TTS_RES_PATH = "fo|res/tts/{}.jet;fo|res/tts/common.jet".format(VOICE_NAME)
class QTTS(object):
def __init__(self, dll: CDLL, recorder: Recorder):
super().__init__()
self.dll = dll
self.recorder = recorder
self._session_valid = False
self.sessionID = c_void_p()
self.begin_params = { # U 通用, L 离线, O 在线
'engine_type': 'purextts', # U 引擎类型: purextts, local, cloud
'voice_name': VOICE_NAME, # U 发言人
'speed': 50, # U 语速
'volumn': 50, # U 音量
'pitch': 50, # U 语调
'tts_res_path': PUREXTTS_RES_PATH, # L 合成资源路径
'rdn': 0, # U 数字发音
'rcn': 1, # L 1 的中文发音
'text_encoding': 'UTF8', # U 合成文本编码格式
'sample_rate': 16000, # U 合成音频采样率
'background_sound': 0, # O 合成音频中的背景音
'aue': 'speex-wb;7', # O 音频编码格式和压缩等级
'ttp': 'text', # O 文本类型
'speed_increase': 1, # L 语速增强
'effect': 0 # L 合成音效
}
self.set_arg_types()
self.set_res_type()
def set_arg_types(self):
self.dll.QTTSSessionBegin.argtypes = [c_char_p, POINTER(c_int)]
self.dll.QTTSTextPut.argtypes = [c_char_p, c_char_p, c_uint, c_char_p]
self.dll.QTTSAudioGet.argtypes = [c_char_p, POINTER(c_uint), POINTER(c_int), POINTER(c_int)]
self.dll.QTTSSessionEnd.argtypes = [c_char_p, c_char_p]
self.dll.QTTSGetParam.argtypes = [c_char_p, c_char_p, c_char_p, POINTER(c_int)]
def set_res_type(self):
self.dll.QTTSSessionBegin.restype = c_char_p
self.dll.QTTSAudioGet.restype = c_void_p
self.dll.QTTSGetParam.restype = c_char_p
def SessionBegin(self, params=None):
"""QTTSSessionBegin, 开始一次语音合成,分配语音合成资源。
Args:
params (dict or str, optional): QTTSSessionBegin 的参数. 可以传入字典或字符串,默认使用 self.begin_params
Raises:
RuntimeError: SessionBegin failed.
Returns:
bytes: sessionID
"""
if not params:
params = self.begin_params
if type(params) is dict:
params = params_str_from_dict(params)
if type(params) is str:
params = params.encode('utf8')
elif type(params) is bytes:
params = params
else:
raise TypeError("Wrong params type.")
error_code = c_int()
self.sessionID = self.dll.QTTSSessionBegin(params, byref(error_code))
if MSP_SUCCESS != error_code.value:
raise RuntimeError("QTTSSessionBegin failed, error code: %d" % error_code.value)
self._session_valid = True
return self.sessionID
def TextPut(self, text_string="你好,这是一段合成语音"):
"""QTTSTextPut, 写入要合成的文本。
Args:
text_string (str, optional): 需要合成的文本
Raises:
RuntimeError: QTTSTextPut failed
"""
assert text_string is not None, "TextPut 输入文本为空"
if type(text_string) is str:
text_string = text_string.encode('utf8')
text_len = self.dll.strlen(text_string)
ret = self.dll.QTTSTextPut(self.sessionID, text_string, text_len, None)
if MSP_SUCCESS != ret:
raise RuntimeError("QTTSTextPut failed, error code: %d" % ret)
def AudioGet(self):
"""QTTSAudioGet, 获取合成音频。不同于官方实现,该函数会获取完整的合成音频并一起返回。
Returns:
bytes: 完整的合成音频 (此时 synth_status 为 MSP_TTS_FLAG_DATA_END)
"""
audio_len = c_uint()
synth_status = c_int()
error_code = c_int()
data = c_void_p()
frames = []
while True:
data = self.dll.QTTSAudioGet(self.sessionID, byref(audio_len), byref(synth_status), byref(error_code))
if data is not None:
frames.append(read_charp_with_len(data, audio_len).raw)
if MSP_TTS_FLAG_DATA_END == synth_status.value:
break
return b''.join(frames)
def SessionEnd(self):
"""QTTSSessionEnd, 结束本次语音合成。
Raises:
RuntimeError: QTTSSessionEnd failed
"""
hints = "Done TTS"
hints = hints.encode('utf8')
ret = self.dll.QTTSSessionEnd(self.sessionID, hints)
if MSP_SUCCESS != ret:
raise RuntimeError("QTTSSessionEnd failed, errCode: %d" % ret)
self.sessionID = c_void_p()
self._session_valid = False
def GetParam(self, param_name=None):
"""QTTSGetparam, 获取当前语音合成信息,如当前合成音频对应文本结束位置、上行流量、下行流量等。
Args:
param_name (str, optional): paramName 参数名. Defaults to None.
Raises:
RuntimeError: QTTSGetparam failed
Returns:
str: 参数值
"""
# NOT WORKING!!!
assert param_name in ["sid", "upflow", "downflow", "ced"], "Wrong paramName"
param_name = param_name.encode('utf8')
param_value = (c_char * 32)()
valueLen = c_int(32)
ret = self.dll.QTTSGetParam(self.sessionID, param_name, param_value, byref(valueLen))
if MSP_SUCCESS != ret:
raise RuntimeError("QTTSGetParam failed, error code: %d" % ret)
return param_value.decode('utf8')
def say(self, text_string=None, blocking=False, output_file_path=None):
"""执行一次语音合成并通过扬声器播放合成音频
Args:
text_string (str, optional): 要合成的文本. Defaults to None.
blocking (bool, optional): 播放音频时是否阻塞交互. Defaults to True.
output_file_path (str, optional): 输出音频的文件名
"""
try:
self.SessionBegin()
self.TextPut(text_string)
self.recorder.abort()
audio = self.AudioGet()
self.recorder.play_buffer(audio, blocking=blocking)
if output_file_path is not None:
self.recorder.save_audio(output_file_path, audio, sample_rate=16000)
self.SessionEnd()
except (RuntimeError, ValueError) as e:
traceback.print_exc()
def __del__(self):
if self._session_valid:
try:
self.SessionEnd()
except RuntimeError as e:
traceback.print_exc()
if __name__ == '__main__':
msp_cmn = MSP_CMN()
msp_cmn.Login()
recorder = Recorder()
tts = QTTS(msp_cmn.dll, recorder)
tts.say(text_string=args.tts_text, output_file_path=args.output_audio_file, blocking=True)