|
1 | 1 | #import "LLM.h"
|
2 | 2 | #import <ExecutorchLib/LLaMARunner.h>
|
3 |
| -#import "utils/ConversationManager.h" |
4 |
| -#import "utils/Constants.h" |
| 3 | +#import "utils/llms/ConversationManager.h" |
| 4 | +#import "utils/llms/Constants.h" |
5 | 5 | #import "utils/Fetcher.h"
|
6 | 6 | #import "utils/LargeFileFetcher.h"
|
7 | 7 | #import <UIKit/UIKit.h>
|
@@ -47,77 +47,77 @@ - (void)onResult:(NSString *)token prompt:(NSString *)prompt {
|
47 | 47 |
|
48 | 48 | - (void)updateDownloadProgress:(NSNumber *)progress {
|
49 | 49 | dispatch_async(dispatch_get_main_queue(), ^{
|
50 |
| - [self emitOnDownloadProgress:progress]; |
| 50 | + [self emitOnDownloadProgress:progress]; |
51 | 51 | });
|
52 | 52 | }
|
53 | 53 |
|
54 | 54 | - (void)loadLLM:(NSString *)modelSource tokenizerSource:(NSString *)tokenizerSource systemPrompt:(NSString *)systemPrompt contextWindowLength:(double)contextWindowLength resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject {
|
55 |
| - NSURL *modelURL = [NSURL URLWithString:modelSource]; |
56 |
| - NSURL *tokenizerURL = [NSURL URLWithString:tokenizerSource]; |
57 |
| - |
58 |
| - if(self->runner || isFetching){ |
59 |
| - reject(@"model_already_loaded", @"Model and tokenizer already loaded", nil); |
| 55 | + NSURL *modelURL = [NSURL URLWithString:modelSource]; |
| 56 | + NSURL *tokenizerURL = [NSURL URLWithString:tokenizerSource]; |
| 57 | + |
| 58 | + if(self->runner || isFetching){ |
| 59 | + reject(@"model_already_loaded", @"Model and tokenizer already loaded", nil); |
| 60 | + return; |
| 61 | + } |
| 62 | + |
| 63 | + isFetching = YES; |
| 64 | + [Fetcher fetchResource:tokenizerURL resourceType:ResourceType::TOKENIZER completionHandler:^(NSString *tokenizerFilePath, NSError *error) { |
| 65 | + if(error){ |
| 66 | + reject(@"download_error", error.localizedDescription, nil); |
60 | 67 | return;
|
61 | 68 | }
|
| 69 | + LargeFileFetcher *modelFetcher = [[LargeFileFetcher alloc] init]; |
| 70 | + modelFetcher.onProgress = ^(NSNumber *progress) { |
| 71 | + [self updateDownloadProgress:progress]; |
| 72 | + }; |
62 | 73 |
|
63 |
| - isFetching = YES; |
64 |
| - [Fetcher fetchResource:tokenizerURL resourceType:ResourceType::TOKENIZER completionHandler:^(NSString *tokenizerFilePath, NSError *error) { |
65 |
| - if(error){ |
66 |
| - reject(@"download_error", error.localizedDescription, nil); |
67 |
| - return; |
68 |
| - } |
69 |
| - LargeFileFetcher *modelFetcher = [[LargeFileFetcher alloc] init]; |
70 |
| - modelFetcher.onProgress = ^(NSNumber *progress) { |
71 |
| - [self updateDownloadProgress:progress]; |
72 |
| - }; |
73 |
| - |
74 |
| - modelFetcher.onFailure = ^(NSError *error){ |
75 |
| - reject(@"download_error", error.localizedDescription, nil); |
76 |
| - return; |
77 |
| - }; |
78 |
| - |
79 |
| - modelFetcher.onFinish = ^(NSString *modelFilePath) { |
80 |
| - self->runner = [[LLaMARunner alloc] initWithModelPath:modelFilePath tokenizerPath:tokenizerFilePath]; |
81 |
| - NSUInteger contextWindowLengthUInt = (NSUInteger)round(contextWindowLength); |
82 |
| - |
83 |
| - self->conversationManager = [[ConversationManager alloc] initWithNumMessagesContextWindow: contextWindowLengthUInt systemPrompt: systemPrompt]; |
84 |
| - self->isFetching = NO; |
85 |
| - resolve(@"Model and tokenizer loaded successfully"); |
86 |
| - return; |
87 |
| - }; |
| 74 | + modelFetcher.onFailure = ^(NSError *error){ |
| 75 | + reject(@"download_error", error.localizedDescription, nil); |
| 76 | + return; |
| 77 | + }; |
| 78 | + |
| 79 | + modelFetcher.onFinish = ^(NSString *modelFilePath) { |
| 80 | + self->runner = [[LLaMARunner alloc] initWithModelPath:modelFilePath tokenizerPath:tokenizerFilePath]; |
| 81 | + NSUInteger contextWindowLengthUInt = (NSUInteger)round(contextWindowLength); |
88 | 82 |
|
89 |
| - [modelFetcher startDownloadingFileFromURL:modelURL]; |
90 |
| - }]; |
| 83 | + self->conversationManager = [[ConversationManager alloc] initWithNumMessagesContextWindow: contextWindowLengthUInt systemPrompt: systemPrompt]; |
| 84 | + self->isFetching = NO; |
| 85 | + resolve(@"Model and tokenizer loaded successfully"); |
| 86 | + return; |
| 87 | + }; |
| 88 | + |
| 89 | + [modelFetcher startDownloadingFileFromURL:modelURL]; |
| 90 | + }]; |
91 | 91 | }
|
92 | 92 |
|
93 | 93 |
|
94 | 94 | - (void) runInference:(NSString *)input resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject {
|
95 |
| - [conversationManager addResponse:input senderRole:ChatRole::USER]; |
96 |
| - NSString *prompt = [conversationManager getConversation]; |
97 |
| - |
98 |
| - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ |
99 |
| - NSError *error = nil; |
100 |
| - [self->runner generate:prompt withTokenCallback:^(NSString *token) { |
101 |
| - [self onResult:token prompt:prompt]; |
102 |
| - } error:&error]; |
103 |
| - |
104 |
| - // make sure to add eot token once generation is done |
105 |
| - if (![self->tempLlamaResponse hasSuffix:END_OF_TEXT_TOKEN_NS]) { |
106 |
| - [self onResult:END_OF_TEXT_TOKEN_NS prompt:prompt]; |
107 |
| - } |
108 |
| - |
109 |
| - if (self->tempLlamaResponse) { |
110 |
| - [self->conversationManager addResponse:self->tempLlamaResponse senderRole:ChatRole::ASSISTANT]; |
111 |
| - self->tempLlamaResponse = [NSMutableString string]; |
112 |
| - } |
113 |
| - |
114 |
| - if (error) { |
115 |
| - reject(@"error_in_generation", error.localizedDescription, nil); |
116 |
| - return; |
117 |
| - } |
118 |
| - resolve(@"Inference completed successfully"); |
119 |
| - return; |
120 |
| - }); |
| 95 | + [conversationManager addResponse:input senderRole:ChatRole::USER]; |
| 96 | + NSString *prompt = [conversationManager getConversation]; |
| 97 | + |
| 98 | + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ |
| 99 | + NSError *error = nil; |
| 100 | + [self->runner generate:prompt withTokenCallback:^(NSString *token) { |
| 101 | + [self onResult:token prompt:prompt]; |
| 102 | + } error:&error]; |
| 103 | + |
| 104 | + // make sure to add eot token once generation is done |
| 105 | + if (![self->tempLlamaResponse hasSuffix:END_OF_TEXT_TOKEN_NS]) { |
| 106 | + [self onResult:END_OF_TEXT_TOKEN_NS prompt:prompt]; |
| 107 | + } |
| 108 | + |
| 109 | + if (self->tempLlamaResponse) { |
| 110 | + [self->conversationManager addResponse:self->tempLlamaResponse senderRole:ChatRole::ASSISTANT]; |
| 111 | + self->tempLlamaResponse = [NSMutableString string]; |
| 112 | + } |
| 113 | + |
| 114 | + if (error) { |
| 115 | + reject(@"error_in_generation", error.localizedDescription, nil); |
| 116 | + return; |
| 117 | + } |
| 118 | + resolve(@"Inference completed successfully"); |
| 119 | + return; |
| 120 | + }); |
121 | 121 | }
|
122 | 122 |
|
123 | 123 |
|
|
0 commit comments