Skip to content

Commit 9116ae9

Browse files
authored
Change argument processing to allow prompt or file args. (#103)
1 parent 428aa70 commit 9116ae9

File tree

3 files changed

+38
-26
lines changed

3 files changed

+38
-26
lines changed

chat.cpp

+23-20
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
318318
fin.close();
319319

320320
std::vector<uint8_t> tmp;
321-
321+
322322
for (int i = 0; i < n_parts; ++i) {
323323
const int part_id = i;
324324
//const int part_id = n_parts - i - 1;
@@ -797,14 +797,6 @@ int main(int argc, char ** argv) {
797797

798798
gpt_params params;
799799

800-
params.temp = 0.1f;
801-
params.top_p = 0.95f;
802-
params.n_ctx = 2048;
803-
params.interactive = true;
804-
params.interactive_start = true;
805-
params.use_color = true;
806-
params.model = "ggml-alpaca-7b-q4.bin";
807-
808800
if (gpt_params_parse(argc, argv, params) == false) {
809801
return 1;
810802
}
@@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
856848
// Add a space in front of the first character to match OG llama tokenizer behavior
857849
// params.prompt.insert(0, 1, ' ');
858850
// tokenize the prompt
859-
std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
851+
std::vector<gpt_vocab::id> embd_inp;
860852

861853
// params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
862854

863855
// // tokenize the reverse prompt
864856
// std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
865857

858+
859+
std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
860+
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
861+
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
862+
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
863+
864+
if(!params.prompt.empty()) {
865+
std::vector<gpt_vocab::id> param_inp = ::llama_tokenize(vocab, params.prompt, true);
866+
embd_inp.insert(embd_inp.end(), prompt_inp.begin(), prompt_inp.end());
867+
embd_inp.insert(embd_inp.end(), param_inp.begin(), param_inp.end());
868+
embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
869+
}
870+
866871
// fprintf(stderr, "\n");
867872
// fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
868873
// fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
@@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
871876
// }
872877
// fprintf(stderr, "\n");
873878

874-
std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
875-
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
876-
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
877-
878-
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
879-
880-
881879
if (params.interactive) {
882880
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
883881
struct sigaction sigint_action;
@@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {
10761074

10771075
// end of text token
10781076
if (embd.back() == 2) {
1079-
// fprintf(stderr, " [end of text]\n");
1080-
is_interacting = true;
1081-
continue;
1077+
if (params.interactive) {
1078+
is_interacting = true;
1079+
continue;
1080+
} else {
1081+
printf("\n");
1082+
fprintf(stderr, " [end of text]\n");
1083+
break;
1084+
}
10821085
}
10831086
}
10841087

utils.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
2424
} else if (arg == "-t" || arg == "--threads") {
2525
params.n_threads = std::stoi(argv[++i]);
2626
} else if (arg == "-p" || arg == "--prompt") {
27+
params.interactive = false;
28+
params.interactive_start = false;
29+
params.use_color = false;
30+
2731
params.prompt = argv[++i];
2832
} else if (arg == "-f" || arg == "--file") {
2933

34+
params.interactive = false;
35+
params.interactive_start = false;
36+
params.use_color = false;
37+
3038
std::ifstream file(argv[++i]);
3139

3240
std::copy(std::istreambuf_iterator<char>(file),

utils.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,29 @@
1212
// CLI argument parsing
1313
//
1414

15+
// The default parameters
1516
struct gpt_params {
1617
int32_t seed = -1; // RNG seed
1718
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
1819
int32_t n_predict = 128; // new tokens to predict
1920
int32_t repeat_last_n = 64; // last n tokens to penalize
20-
int32_t n_ctx = 512; //context size
21+
int32_t n_ctx = 2048; //context size
2122

2223
// sampling parameters
2324
int32_t top_k = 40;
2425
float top_p = 0.95f;
25-
float temp = 0.80f;
26+
float temp = 0.10f;
2627
float repeat_penalty = 1.30f;
2728

2829
int32_t n_batch = 8; // batch size for prompt processing
2930

30-
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
31+
std::string model = "ggml-alpaca-7b-q4.bin"; // model path
3132
std::string prompt;
3233

33-
bool use_color = false; // use color to distinguish generations and inputs
34+
bool use_color = true; // use color to distinguish generations and inputs
3435

35-
bool interactive = false; // interactive mode
36-
bool interactive_start = false; // reverse prompt immediately
36+
bool interactive = true; // interactive mode
37+
bool interactive_start = true; // reverse prompt immediately
3738
std::string antiprompt = ""; // string upon seeing which more user input is prompted
3839
};
3940

0 commit comments

Comments
 (0)