Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions TypeaheadAI.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
2B473E872AA85B9A0042913D /* IncognitoModeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B473E862AA85B9A0042913D /* IncognitoModeView.swift */; };
2B473E892AA85BDD0042913D /* LlamaModelManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B473E882AA85BDD0042913D /* LlamaModelManager.swift */; };
2B473E8C2AA860380042913D /* MenuBarExtraAccess in Frameworks */ = {isa = PBXBuildFile; productRef = 2B473E8B2AA860380042913D /* MenuBarExtraAccess */; };
2B5C2C832AB79EB800072D71 /* SpecialRecordActor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B5C2C822AB79EB800072D71 /* SpecialRecordActor.swift */; };
2B5C2C852AB7A0D100072D71 /* TranscriptionManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B5C2C842AB7A0D100072D71 /* TranscriptionManager.swift */; };
2B8B952B2A9C528B00FB9EA9 /* ScriptManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8B952A2A9C528B00FB9EA9 /* ScriptManager.swift */; };
2B92BDB92AA3A2DD00E65CFA /* CustomModalWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */; };
2B92BDBB2AA3D10800E65CFA /* ModalManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */; };
Expand Down Expand Up @@ -81,6 +83,8 @@
2B3FAC222AAAF44D00B2D405 /* LlamaWrapper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaWrapper.swift; sourceTree = "<group>"; };
2B473E862AA85B9A0042913D /* IncognitoModeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = IncognitoModeView.swift; sourceTree = "<group>"; };
2B473E882AA85BDD0042913D /* LlamaModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaModelManager.swift; sourceTree = "<group>"; };
2B5C2C822AB79EB800072D71 /* SpecialRecordActor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpecialRecordActor.swift; sourceTree = "<group>"; };
2B5C2C842AB7A0D100072D71 /* TranscriptionManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptionManager.swift; sourceTree = "<group>"; };
2B8B952A2A9C528B00FB9EA9 /* ScriptManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ScriptManager.swift; sourceTree = "<group>"; };
2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomModalWindow.swift; sourceTree = "<group>"; };
2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalManager.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -155,6 +159,7 @@
2B2745092AB01CF400F37D3E /* SpecialSaveActor.swift */,
2B27450F2AB03A3D00F37D3E /* CanSimulateCopy.swift */,
2BF929862AB16C4C00FC105B /* ResponseParsingTask.swift */,
2B5C2C822AB79EB800072D71 /* SpecialRecordActor.swift */,
);
path = Actors;
sourceTree = "<group>";
Expand Down Expand Up @@ -224,6 +229,7 @@
2B473E882AA85BDD0042913D /* LlamaModelManager.swift */,
2B27450D2AB0380C00F37D3E /* AppContextManager.swift */,
2BF929812AB13F7900FC105B /* MarkdownAttributedStringParser.swift */,
2B5C2C842AB7A0D100072D71 /* TranscriptionManager.swift */,
);
path = TypeaheadAI;
sourceTree = "<group>";
Expand Down Expand Up @@ -455,10 +461,12 @@
2BCF843A2A9DE6DA00359841 /* GeneralSettingsView.swift in Sources */,
2B33D87D2AAC3330001193A2 /* ProfileView.swift in Sources */,
2BA7F0B52A9ABCD7003D38BA /* PromptManager.swift in Sources */,
2B5C2C852AB7A0D100072D71 /* TranscriptionManager.swift in Sources */,
2BF929872AB16C4C00FC105B /* ResponseParsingTask.swift in Sources */,
2B3FAC232AAAF44D00B2D405 /* LlamaWrapper.swift in Sources */,
2B3FAC212AAAF22500B2D405 /* LlamaWrapper.cpp in Sources */,
2BA3C2372AADAD9A00537F95 /* SpecialCopyActor.swift in Sources */,
2B5C2C832AB79EB800072D71 /* SpecialRecordActor.swift in Sources */,
2B27450E2AB0380C00F37D3E /* AppContextManager.swift in Sources */,
2B27450A2AB01CF400F37D3E /* SpecialSaveActor.swift in Sources */,
2BA7F0792A9ABBA8003D38BA /* TypeaheadAIApp.swift in Sources */,
Expand Down Expand Up @@ -653,6 +661,8 @@
INFOPLIST_KEY_LSUIElement = YES;
INFOPLIST_KEY_NSAppleEventsUsageDescription = "We need to control Google Chrome to get the URL of the active tab.";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "We need access to the microphone to allow for audio interface";
INFOPLIST_KEY_NSSpeechRecognitionUsageDescription = "We need access to the speech recognition toolkit so that the user can interface with the LLM through voice input";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
Expand Down Expand Up @@ -693,6 +703,8 @@
INFOPLIST_KEY_LSUIElement = YES;
INFOPLIST_KEY_NSAppleEventsUsageDescription = "We need to control Google Chrome to get the URL of the active tab.";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "We need access to the microphone to allow for audio interface";
INFOPLIST_KEY_NSSpeechRecognitionUsageDescription = "We need access to the speech recognition toolkit so that the user can interface with the LLM through voice input";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<key>TypeaheadAI.xcscheme_^#shared#^_</key>
<dict>
<key>orderHint</key>
<integer>0</integer>
<integer>1</integer>
</dict>
</dict>
</dict>
Expand Down
1 change: 1 addition & 0 deletions TypeaheadAI/Actors/SpecialCutActor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class ClipboardMonitor {
}

func stopMonitoring() {
logger.debug("stop monitoring")
timer?.invalidate()
timer = nil
}
Expand Down
27 changes: 27 additions & 0 deletions TypeaheadAI/Actors/SpecialRecordActor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//
// SpecialRecordActor.swift
// TypeaheadAI
//
// Created by Jeff Hara on 9/17/23.
//

import Foundation
import AVFoundation
import Speech
import os.log

actor SpecialRecordActor {
private let audioEngine = AVAudioEngine()
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?

private let logger = Logger(
subsystem: "ai.typeahead.TypeaheadAI",
category: "SpecialRecordActor"
)

func specialRecord() {

}
}
10 changes: 10 additions & 0 deletions TypeaheadAI/ModalManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class ModalManager: ObservableObject {

// TODO: Inject?
var clientManager: ClientManager? = nil
var transcriptionManager: TranscriptionManager? = TranscriptionManager()

var toastWindow: CustomModalWindow?

Expand Down Expand Up @@ -377,6 +378,15 @@ class ModalManager: ObservableObject {
}
}

@MainActor
func cancelRecordingTask() {
transcriptionManager?.stopRecording()
}

func startRecording(completion: @escaping (String) -> Void) {
transcriptionManager?.startRecording(completion: completion)
}

@objc func windowDidMove(_ notification: Notification) {
if let movedWindow = notification.object as? NSWindow {
let origin = movedWindow.frame.origin
Expand Down
140 changes: 140 additions & 0 deletions TypeaheadAI/TranscriptionManager.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
//
// TranscriptionManager.swift
// TypeaheadAI
//
// Created by Jeff Hara on 9/17/23.
//

import Foundation
import AVFoundation
import Speech
import os.log

enum TranscriptionManagerError: Error {
case notAuthorized
case illegalState

var localizedDescription: String {
switch self {
case .notAuthorized:
return "The user has not authorized speech recognition"
case .illegalState:
return "Speech recognizer is not available"
}
}
}

class TranscriptionManager {
private let logger = Logger(
subsystem: "ai.typeahead.TypeaheadAI",
category: "TranscriptionManager"
)

private var audioEngine: AVAudioEngine?
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?

func startRecording(completion: @escaping (String) -> Void) {
self.recognitionTask?.cancel()
self.recognitionTask = nil

requestSpeechAuthorization() { [weak self] result in
switch result {
case .success():
do {
try self?.setupRecording(completion: completion)
} catch {
self?.logger.error("\(error.localizedDescription)")
}
case .failure(let error):
self?.logger.error("Authorization failed with error: \(error.localizedDescription)")
}
}
}

private func setupRecording(completion: @escaping (String) -> Void) throws {
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

audioEngine = AVAudioEngine()
speechRecognizer = SFSpeechRecognizer()

guard let inputNode = audioEngine?.inputNode else {
self.logger.error("AudioEngine is not initialized")
throw TranscriptionManagerError.illegalState
}

inputNode.reset()
inputNode.removeTap(onBus: 0)

// Get the system recording format
let hardwareFormat = inputNode.inputFormat(forBus: 0)
let recordingFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: hardwareFormat.sampleRate, channels: 1, interleaved: false)

inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] (buffer, _) in
self?.recognitionRequest?.append(buffer)
}

audioEngine?.prepare()
do {
try audioEngine?.start()
} catch {
logger.error("Audio engine failed to start: \(error.localizedDescription)")
throw TranscriptionManagerError.illegalState
}

DispatchQueue.main.async {
self.recognitionTask = self.speechRecognizer?.recognitionTask(with: self.recognitionRequest!) { [weak self] (result, error) in
guard let self = self else { return }

if let error = error as NSError?, error.domain == "kAFAssistantErrorDomain" && error.code == 216 {
self.logger.info("Recognition task was cancelled")
} else if let error = error {
self.logger.error("Recognition task failed with error: \(error.localizedDescription)")
} else if let transcription = result?.bestTranscription {
self.logger.info("Recognized text: \(transcription.formattedString)")
completion(transcription.formattedString)
} else {
self.logger.info("No recognition result available")
}
}
}

logger.info("Successfully started recording")
}

/// When using Bluetooth headphones, the output audio quality drops if the microphone is enabled.
/// That can't be helped, but make sure to test with BT headphones that when the recording stops
/// the audio quality returns to normal. Could be a sign that something wasn't cleaned up properly.
func stopRecording() {
recognitionTask?.cancel()
recognitionTask = nil
audioEngine?.stop()
audioEngine?.inputNode.reset()
audioEngine?.inputNode.removeTap(onBus: 0)
audioEngine = nil
speechRecognizer = nil
recognitionRequest?.endAudio()
recognitionRequest = nil
}

private func requestSpeechAuthorization(completion: @escaping (Result<Void, TranscriptionManagerError>) -> Void) {
SFSpeechRecognizer.requestAuthorization { [weak self] authStatus in
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
self?.speechRecognizer = SFSpeechRecognizer()
if self?.speechRecognizer?.isAvailable ?? false {
completion(.success(()))
} else {
self?.logger.error("\(TranscriptionManagerError.illegalState.localizedDescription)")
completion(.failure(.illegalState))
}
default:
self?.logger.error("\(TranscriptionManagerError.notAuthorized.localizedDescription)")
completion(.failure(.notAuthorized))
}
}
}
}
}
2 changes: 2 additions & 0 deletions TypeaheadAI/TypeaheadAI.entitlements
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
<true/>
<key>com.apple.security.automation.apple-events</key>
<true/>
<key>com.apple.security.device.audio-input</key>
<true/>
<key>com.apple.security.files.user-selected.read-write</key>
<true/>
<key>com.apple.security.network.client</key>
Expand Down
Loading