Skip to content

Commit dccb148

Browse files
committed
feat: Update llama.cpp
1 parent a14b49d commit dccb148

File tree

2 files changed

+66
-27
lines changed

2 files changed

+66
-27
lines changed

llama_cpp/llama_cpp.py

+65-26
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,6 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
233233
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
234234
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
235235

236-
# define LLAMA_MAX_RNG_STATE (64*1024)
237-
LLAMA_MAX_RNG_STATE = 64 * 1024
238-
239236
# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
240237
LLAMA_FILE_MAGIC_GGLA = 0x67676C61
241238

@@ -247,13 +244,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
247244

248245
# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
249246
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
250-
# define LLAMA_SESSION_VERSION 7
251-
LLAMA_SESSION_VERSION = 7
247+
# define LLAMA_SESSION_VERSION 8
248+
LLAMA_SESSION_VERSION = 8
252249

253250
# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
254251
LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ
255-
# define LLAMA_STATE_SEQ_VERSION 1
256-
LLAMA_STATE_SEQ_VERSION = 1
252+
# define LLAMA_STATE_SEQ_VERSION 2
253+
LLAMA_STATE_SEQ_VERSION = 2
257254

258255
# struct llama_model;
259256
llama_model_p = NewType("llama_model_p", int)
@@ -1583,7 +1580,7 @@ def llama_lora_adapter_set(
15831580
...
15841581

15851582

1586-
# // Remove a LoRA adapter from given context
1583+
# // Remove a specific LoRA adapter from given context
15871584
# // Return -1 if the adapter is not present in the context
15881585
# LLAMA_API int32_t llama_lora_adapter_remove(
15891586
# struct llama_context * ctx,
@@ -1601,6 +1598,19 @@ def llama_lora_adapter_remove(
16011598
...
16021599

16031600

1601+
# // Remove all LoRA adapters from given context
1602+
# LLAMA_API void llama_lora_adapter_clear(
1603+
# struct llama_context * ctx);
1604+
@ctypes_function(
1605+
"llama_lora_adapter_clear",
1606+
[llama_context_p_ctypes],
1607+
None,
1608+
)
1609+
def llama_lora_adapter_clear(ctx: llama_context_p, /):
1610+
"""Remove all LoRA adapters from given context"""
1611+
...
1612+
1613+
16041614
# // Manually free a LoRA adapter
16051615
# // Note: loaded adapters will be free when the associated model is deleted
16061616
# LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
@@ -1992,17 +2002,17 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
19922002
# //
19932003

19942004

1995-
# Returns the maximum size in bytes of the state (rng, logits, embedding
1996-
# and kv_cache) - will often be smaller after compacting tokens
1997-
# LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
2005+
# // Returns the *actual* size in bytes of the state
2006+
# // (rng, logits, embedding and kv_cache)
2007+
# // Only use when saving the state, not when restoring it, otherwise the size may be too small.
2008+
# LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
19982009
@ctypes_function("llama_state_get_size", [llama_context_p_ctypes], ctypes.c_size_t)
19992010
def llama_state_get_size(ctx: llama_context_p, /) -> int:
2000-
"""Returns the maximum size in bytes of the state (rng, logits, embedding
2001-
and kv_cache) - will often be smaller after compacting tokens"""
2011+
"""Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
20022012
...
20032013

20042014

2005-
# LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
2015+
# LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
20062016
# "use llama_state_get_size instead");
20072017
@ctypes_function("llama_get_state_size", [llama_context_p_ctypes], ctypes.c_size_t)
20082018
def llama_get_state_size(ctx: llama_context_p, /) -> int:
@@ -2011,22 +2021,27 @@ def llama_get_state_size(ctx: llama_context_p, /) -> int:
20112021
...
20122022

20132023

2014-
# Copies the state to the specified destination address.
2015-
# Destination needs to have allocated enough memory.
2016-
# Returns the number of bytes copied
2024+
# // Copies the state to the specified destination address.
2025+
# // Destination needs to have allocated enough memory.
2026+
# // Returns the number of bytes copied
20172027
# LLAMA_API size_t llama_state_get_data(
20182028
# struct llama_context * ctx,
2019-
# uint8_t * dst);
2029+
# uint8_t * dst,
2030+
# size_t size);
20202031
@ctypes_function(
20212032
"llama_state_get_data",
20222033
[
20232034
llama_context_p_ctypes,
20242035
ctypes.POINTER(ctypes.c_uint8),
2036+
ctypes.c_size_t,
20252037
],
20262038
ctypes.c_size_t,
20272039
)
20282040
def llama_state_get_data(
2029-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], /
2041+
ctx: llama_context_p,
2042+
dst: CtypesArray[ctypes.c_uint8],
2043+
size: Union[ctypes.c_size_t, int],
2044+
/,
20302045
) -> int:
20312046
"""Copies the state to the specified destination address.
20322047
Destination needs to have allocated enough memory.
@@ -2059,14 +2074,18 @@ def llama_copy_state_data(
20592074
# // Returns the number of bytes read
20602075
# LLAMA_API size_t llama_state_set_data(
20612076
# struct llama_context * ctx,
2062-
# const uint8_t * src);
2077+
# const uint8_t * src,
2078+
# size_t size);
20632079
@ctypes_function(
20642080
"llama_state_set_data",
2065-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)],
2081+
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), ctypes.c_size_t],
20662082
ctypes.c_size_t,
20672083
)
20682084
def llama_state_set_data(
2069-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], /
2085+
ctx: llama_context_p,
2086+
src: CtypesArray[ctypes.c_uint8],
2087+
size: Union[ctypes.c_size_t, int],
2088+
/,
20702089
) -> int:
20712090
"""Set the state reading from the specified address
20722091
Returns the number of bytes read"""
@@ -2216,14 +2235,24 @@ def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> i
22162235
# LLAMA_API size_t llama_state_seq_get_data(
22172236
# struct llama_context * ctx,
22182237
# uint8_t * dst,
2238+
# size_t size,
22192239
# llama_seq_id seq_id);
22202240
@ctypes_function(
22212241
"llama_state_seq_get_data",
2222-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2242+
[
2243+
llama_context_p_ctypes,
2244+
ctypes.POINTER(ctypes.c_uint8),
2245+
ctypes.c_size_t,
2246+
llama_seq_id,
2247+
],
22232248
ctypes.c_size_t,
22242249
)
22252250
def llama_state_seq_get_data(
2226-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], seq_id: llama_seq_id, /
2251+
ctx: llama_context_p,
2252+
dst: CtypesArray[ctypes.c_uint8],
2253+
size: Union[ctypes.c_size_t, int],
2254+
seq_id: llama_seq_id,
2255+
/,
22272256
) -> int:
22282257
"""Copy the KV cache of a single sequence into the specified buffer"""
22292258
...
@@ -2236,14 +2265,24 @@ def llama_state_seq_get_data(
22362265
# LLAMA_API size_t llama_state_seq_set_data(
22372266
# struct llama_context * ctx,
22382267
# const uint8_t * src,
2268+
# size_t size,
22392269
# llama_seq_id dest_seq_id);
22402270
@ctypes_function(
22412271
"llama_state_seq_set_data",
2242-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2272+
[
2273+
llama_context_p_ctypes,
2274+
ctypes.POINTER(ctypes.c_uint8),
2275+
ctypes.c_size_t,
2276+
llama_seq_id,
2277+
],
22432278
ctypes.c_size_t,
22442279
)
22452280
def llama_state_seq_set_data(
2246-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], dest_seq_id: llama_seq_id, /
2281+
ctx: llama_context_p,
2282+
src: CtypesArray[ctypes.c_uint8],
2283+
size: Union[ctypes.c_size_t, int],
2284+
dest_seq_id: llama_seq_id,
2285+
/,
22472286
) -> int:
22482287
"""Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
22492288
...

vendor/llama.cpp

0 commit comments

Comments
 (0)