We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 207ebbc commit e4647c7Copy full SHA for e4647c7
llama_cpp/server/__main__.py
@@ -29,9 +29,10 @@ class Settings(BaseSettings):
29
model: str
30
n_ctx: int = 2048
31
n_batch: int = 8
32
- n_threads: int = ((os.cpu_count() or 2) // 2) or 1
+ n_threads: int = max((os.cpu_count() or 2) // 2, 1)
33
f16_kv: bool = True
34
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
35
+ use_mmap: bool = True
36
embedding: bool = True
37
last_n_tokens_size: int = 64
38
logits_all: bool = False
@@ -54,6 +55,7 @@ class Settings(BaseSettings):
54
55
settings.model,
56
f16_kv=settings.f16_kv,
57
use_mlock=settings.use_mlock,
58
+ use_mmap=settings.use_mmap,
59
embedding=settings.embedding,
60
logits_all=settings.logits_all,
61
n_threads=settings.n_threads,
0 commit comments