-
-
Notifications
You must be signed in to change notification settings - Fork 96
/
Copy pathwhisper.lua
377 lines (329 loc) · 8.89 KB
/
whisper.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
--------------------------------------------------------------------------------
-- Whisper module for transcribing speech
--------------------------------------------------------------------------------
local uv = vim.uv or vim.loop
local logger = require("gp.logger")
local tasker = require("gp.tasker")
local render = require("gp.render")
local helpers = require("gp.helper")
local vault = require("gp.vault")
local default_config = require("gp.config")
local W = {
--@class GpConfig
config = {},
cmd = {},
disabled = false,
}
---@param opts table # user config
W.setup = function(opts)
logger.debug("whisper setup started\n" .. vim.inspect(opts))
W.config = vim.deepcopy(default_config.whisper)
if opts.disable then
W.disabled = true
logger.debug("whisper is disabled")
return
end
for k, v in pairs(opts) do
W.config[k] = v
end
W.config.store_dir = helpers.prepare_dir(W.config.store_dir, "whisper store")
for cmd, _ in pairs(W.cmd) do
helpers.create_user_command(W.config.cmd_prefix .. cmd, W.cmd[cmd])
end
logger.debug("whisper setup finished")
end
---@param callback function # callback function(text)
---@param language string | nil # language code
local whisper = function(callback, language)
language = language or W.config.language
-- make sure sox is installed
if vim.fn.executable("sox") == 0 then
logger.error("sox is not installed")
return
end
-- get the secret for whisper. It can be empty as well, in case of running local whisper server
local bearer = W.config.secret or vault.get_secret("openai_api_key")
-- get the model.
local model = W.config.model or "whisper-1"
local rec_file = W.config.store_dir .. "/rec.wav"
local rec_options = {
sox = {
cmd = "sox",
opts = {
"-c",
"1",
"--buffer",
"32",
"-d",
"rec.wav",
"trim",
"0",
"3600",
},
exit_code = 0,
},
arecord = {
cmd = "arecord",
opts = {
"-c",
"1",
"-f",
"S16_LE",
"-r",
"48000",
"-d",
3600,
"rec.wav",
},
exit_code = 1,
},
ffmpeg = {
cmd = "ffmpeg",
opts = {
"-y",
"-f",
"avfoundation",
"-i",
":0",
"-t",
"3600",
"rec.wav",
},
exit_code = 255,
},
}
local gid = helpers.create_augroup("GpWhisper", { clear = true })
-- create popup
local buf, _, close_popup, _ = render.popup(
nil,
W.config.cmd_prefix .. " Whisper",
function(w, h)
return 60, 12, (h - 12) * 0.4, (w - 60) * 0.5
end,
{ gid = gid, on_leave = false, escape = false, persist = false },
{ border = W.config.style_popup_border or "single" }
)
-- animated instructions in the popup
local counter = 0
local timer = uv.new_timer()
timer:start(
0,
200,
vim.schedule_wrap(function()
if vim.api.nvim_buf_is_valid(buf) then
vim.api.nvim_buf_set_lines(buf, 0, -1, false, {
" ",
" Speak 👄 loudly 📣 into the microphone 🎤: ",
" " .. string.rep("👂", counter),
" ",
" Pressing <Enter> starts the transcription.",
" ",
" Cancel the recording with <esc>/<C-c> or :GpStop.",
" ",
" The last recording is in /tmp/gp_whisper/.",
})
end
counter = counter + 1
if counter % 22 == 0 then
counter = 0
end
end)
)
local close = tasker.once(function()
if timer then
timer:stop()
timer:close()
end
close_popup()
vim.api.nvim_del_augroup_by_id(gid)
tasker.stop()
end)
helpers.set_keymap({ buf }, { "n", "i", "v" }, "<esc>", function()
tasker.stop()
end)
helpers.set_keymap({ buf }, { "n", "i", "v" }, "<C-c>", function()
tasker.stop()
end)
local continue = false
helpers.set_keymap({ buf }, { "n", "i", "v" }, "<cr>", function()
continue = true
vim.defer_fn(function()
tasker.stop()
end, 300)
end)
-- cleanup on buffer exit
helpers.autocmd({ "BufWipeout", "BufHidden", "BufDelete" }, { buf }, close, gid)
local curl_params = W.config.curl_params or {}
local curl = "curl" .. " " .. table.concat(curl_params, " ")
-- transcribe the recording
local transcribe = function()
local cd_cmd = "cd " .. W.config.store_dir
local export_lc_numeric_cmd = "export LC_NUMERIC='C'"
local sox_norm_cmd = "sox --norm=-3 rec.wav norm.wav"
local sox_silence_t = "t=$(sox 'norm.wav' -n channels 1 stats 2>&1 | grep 'RMS lev dB' "
.. " | sed -e 's/.* //' | awk '{print $1*"
.. W.config.silence
.. "}')"
local remove_silence_cmd = "sox -q norm.wav -C 196.5 final.mp3 silence -l 1 0.05 $t'dB' -1 1.0 $t'dB' pad 0.1 0.1 tempo "
.. W.config.tempo
local curl_bearer_header = ""
if bearer ~= "" then
curl_bearer_header = '-H "Authorization: Bearer ' .. bearer .. '" '
end
local curl_cmd = curl
.. " --max-time 20 "
.. W.config.endpoint
.. " -s "
.. curl_bearer_header
.. '-H "Content-Type: multipart/form-data" '
.. '-F model="'
.. model
.. '" -F "language='
.. language
.. '" -F "[email protected]" '
.. '-F response_format="json"'
local cmd = cd_cmd
.. " && "
.. export_lc_numeric_cmd
.. " && "
.. sox_norm_cmd
.. " && "
.. sox_silence_t
.. " && "
.. remove_silence_cmd
.. " && "
.. curl_cmd
tasker.run(nil, "bash", { "-c", cmd }, function(code, signal, stdout, _)
if code ~= 0 then
logger.error(string.format("Whisper query exited: %d, %d", code, signal))
return
end
if not stdout or stdout == "" or #stdout < 11 then
logger.error("Whisper query, no stdout: " .. vim.inspect(stdout))
return
end
local text = vim.json.decode(stdout).text
if not text then
logger.error("Whisper query, no text: " .. vim.inspect(stdout))
return
end
text = table.concat(vim.split(text, "\n"), " ")
text = text:gsub("%s+$", "")
if callback and stdout then
callback(text)
end
end)
end
local cmd = {}
local rec_cmd = W.config.rec_cmd
-- if rec_cmd not set explicitly, try to autodetect
if not rec_cmd then
rec_cmd = "sox"
if vim.fn.executable("ffmpeg") == 1 then
local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l")
devices = string.gsub(devices, "^%s*(.-)%s*$", "%1")
if devices == "1" then
rec_cmd = "ffmpeg"
end
end
if vim.fn.executable("arecord") == 1 then
rec_cmd = "arecord"
end
end
if type(rec_cmd) == "table" and rec_cmd[1] and rec_options[rec_cmd[1]] then
rec_cmd = vim.deepcopy(rec_cmd)
cmd.cmd = table.remove(rec_cmd, 1)
cmd.exit_code = rec_options[cmd.cmd].exit_code
cmd.opts = rec_cmd
elseif type(rec_cmd) == "string" and rec_options[rec_cmd] then
cmd = rec_options[rec_cmd]
else
logger.error(string.format("Whisper got invalid recording command: %s", rec_cmd))
close()
return
end
for i, v in ipairs(cmd.opts) do
if v == "rec.wav" then
cmd.opts[i] = rec_file
end
end
tasker.run(nil, cmd.cmd, cmd.opts, function(code, signal, stdout, stderr)
close()
if code and code ~= cmd.exit_code then
logger.error(
cmd.cmd
.. " exited with code and signal:\ncode: "
.. code
.. ", signal: "
.. signal
.. "\nstdout: "
.. vim.inspect(stdout)
.. "\nstderr: "
.. vim.inspect(stderr)
)
return
end
if not continue then
return
end
vim.schedule(function()
transcribe()
end)
end)
end
---@param callback function # callback function(text)
---@param language string | nil # language code
W.Whisper = function(callback, language)
vault.run_with_secret("openai_api_key", function()
whisper(callback, language)
end)
end
W.cmd.Whisper = function(params)
local buf = vim.api.nvim_get_current_buf()
local start_line = vim.api.nvim_win_get_cursor(0)[1]
local end_line = start_line
if params.range == 2 then
start_line = params.line1
end_line = params.line2
end
local args = vim.split(params.args, " ")
local language = W.config.language
if args[1] ~= "" then
language = args[1]
end
W.Whisper(function(text)
if not vim.api.nvim_buf_is_valid(buf) then
return
end
if text then
vim.api.nvim_buf_set_lines(buf, start_line - 1, end_line, false, { text })
end
end, language)
end
W.check_health = function()
if W.disabled then
vim.health.warn("whisper is disabled")
return
end
if vim.fn.executable("sox") == 1 then
vim.health.ok("sox is installed")
local output = vim.fn.system("sox -h | grep -i mp3 | wc -l 2>/dev/null")
if output:sub(1, 1) == "0" then
vim.health.error("sox is not compiled with mp3 support" .. "\n on debian/ubuntu install libsox-fmt-mp3")
else
vim.health.ok("sox is compiled with mp3 support")
end
else
vim.health.warn("sox is not installed")
end
if vim.fn.executable("arecord") == 1 then
vim.health.ok("arecord found - will be used for recording (sox for post-processing)")
elseif vim.fn.executable("ffmpeg") == 1 then
local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l")
devices = string.gsub(devices, "^%s*(.-)%s*$", "%1")
if devices == "1" then
vim.health.ok("ffmpeg with avfoundation found - will be used for recording (sox for post-processing)")
end
end
end
return W