@@ -233,9 +233,6 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
233
233
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
234
234
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
235
235
236
- # define LLAMA_MAX_RNG_STATE (64*1024)
237
- LLAMA_MAX_RNG_STATE = 64 * 1024
238
-
239
236
# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
240
237
LLAMA_FILE_MAGIC_GGLA = 0x67676C61
241
238
@@ -247,13 +244,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
247
244
248
245
# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
249
246
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
250
- # define LLAMA_SESSION_VERSION 7
251
- LLAMA_SESSION_VERSION = 7
247
+ # define LLAMA_SESSION_VERSION 8
248
+ LLAMA_SESSION_VERSION = 8
252
249
253
250
# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
254
251
LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ
255
- # define LLAMA_STATE_SEQ_VERSION 1
256
- LLAMA_STATE_SEQ_VERSION = 1
252
+ # define LLAMA_STATE_SEQ_VERSION 2
253
+ LLAMA_STATE_SEQ_VERSION = 2
257
254
258
255
# struct llama_model;
259
256
llama_model_p = NewType ("llama_model_p" , int )
@@ -1583,7 +1580,7 @@ def llama_lora_adapter_set(
1583
1580
...
1584
1581
1585
1582
1586
- # // Remove a LoRA adapter from given context
1583
+ # // Remove a specific LoRA adapter from given context
1587
1584
# // Return -1 if the adapter is not present in the context
1588
1585
# LLAMA_API int32_t llama_lora_adapter_remove(
1589
1586
# struct llama_context * ctx,
@@ -1601,6 +1598,19 @@ def llama_lora_adapter_remove(
1601
1598
...
1602
1599
1603
1600
1601
+ # // Remove all LoRA adapters from given context
1602
+ # LLAMA_API void llama_lora_adapter_clear(
1603
+ # struct llama_context * ctx);
1604
+ @ctypes_function (
1605
+ "llama_lora_adapter_clear" ,
1606
+ [llama_context_p_ctypes ],
1607
+ None ,
1608
+ )
1609
+ def llama_lora_adapter_clear (ctx : llama_context_p , / ):
1610
+ """Remove all LoRA adapters from given context"""
1611
+ ...
1612
+
1613
+
1604
1614
# // Manually free a LoRA adapter
1605
1615
# // Note: loaded adapters will be free when the associated model is deleted
1606
1616
# LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
@@ -1992,17 +2002,17 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
1992
2002
# //
1993
2003
1994
2004
1995
- # Returns the maximum size in bytes of the state (rng, logits, embedding
1996
- # and kv_cache) - will often be smaller after compacting tokens
1997
- # LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
2005
+ # // Returns the *actual* size in bytes of the state
2006
+ # // (rng, logits, embedding and kv_cache)
2007
+ # // Only use when saving the state, not when restoring it, otherwise the size may be too small.
2008
+ # LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
1998
2009
@ctypes_function ("llama_state_get_size" , [llama_context_p_ctypes ], ctypes .c_size_t )
1999
2010
def llama_state_get_size (ctx : llama_context_p , / ) -> int :
2000
- """Returns the maximum size in bytes of the state (rng, logits, embedding
2001
- and kv_cache) - will often be smaller after compacting tokens"""
2011
+ """Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
2002
2012
...
2003
2013
2004
2014
2005
- # LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
2015
+ # LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
2006
2016
# "use llama_state_get_size instead");
2007
2017
@ctypes_function ("llama_get_state_size" , [llama_context_p_ctypes ], ctypes .c_size_t )
2008
2018
def llama_get_state_size (ctx : llama_context_p , / ) -> int :
@@ -2011,22 +2021,27 @@ def llama_get_state_size(ctx: llama_context_p, /) -> int:
2011
2021
...
2012
2022
2013
2023
2014
- # Copies the state to the specified destination address.
2015
- # Destination needs to have allocated enough memory.
2016
- # Returns the number of bytes copied
2024
+ # // Copies the state to the specified destination address.
2025
+ # // Destination needs to have allocated enough memory.
2026
+ # // Returns the number of bytes copied
2017
2027
# LLAMA_API size_t llama_state_get_data(
2018
2028
# struct llama_context * ctx,
2019
- # uint8_t * dst);
2029
+ # uint8_t * dst,
2030
+ # size_t size);
2020
2031
@ctypes_function (
2021
2032
"llama_state_get_data" ,
2022
2033
[
2023
2034
llama_context_p_ctypes ,
2024
2035
ctypes .POINTER (ctypes .c_uint8 ),
2036
+ ctypes .c_size_t ,
2025
2037
],
2026
2038
ctypes .c_size_t ,
2027
2039
)
2028
2040
def llama_state_get_data (
2029
- ctx : llama_context_p , dst : CtypesArray [ctypes .c_uint8 ], /
2041
+ ctx : llama_context_p ,
2042
+ dst : CtypesArray [ctypes .c_uint8 ],
2043
+ size : Union [ctypes .c_size_t , int ],
2044
+ / ,
2030
2045
) -> int :
2031
2046
"""Copies the state to the specified destination address.
2032
2047
Destination needs to have allocated enough memory.
@@ -2059,14 +2074,18 @@ def llama_copy_state_data(
2059
2074
# // Returns the number of bytes read
2060
2075
# LLAMA_API size_t llama_state_set_data(
2061
2076
# struct llama_context * ctx,
2062
- # const uint8_t * src);
2077
+ # const uint8_t * src,
2078
+ # size_t size);
2063
2079
@ctypes_function (
2064
2080
"llama_state_set_data" ,
2065
- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 )],
2081
+ [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), ctypes . c_size_t ],
2066
2082
ctypes .c_size_t ,
2067
2083
)
2068
2084
def llama_state_set_data (
2069
- ctx : llama_context_p , src : CtypesArray [ctypes .c_uint8 ], /
2085
+ ctx : llama_context_p ,
2086
+ src : CtypesArray [ctypes .c_uint8 ],
2087
+ size : Union [ctypes .c_size_t , int ],
2088
+ / ,
2070
2089
) -> int :
2071
2090
"""Set the state reading from the specified address
2072
2091
Returns the number of bytes read"""
@@ -2216,14 +2235,24 @@ def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> i
2216
2235
# LLAMA_API size_t llama_state_seq_get_data(
2217
2236
# struct llama_context * ctx,
2218
2237
# uint8_t * dst,
2238
+ # size_t size,
2219
2239
# llama_seq_id seq_id);
2220
2240
@ctypes_function (
2221
2241
"llama_state_seq_get_data" ,
2222
- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), llama_seq_id ],
2242
+ [
2243
+ llama_context_p_ctypes ,
2244
+ ctypes .POINTER (ctypes .c_uint8 ),
2245
+ ctypes .c_size_t ,
2246
+ llama_seq_id ,
2247
+ ],
2223
2248
ctypes .c_size_t ,
2224
2249
)
2225
2250
def llama_state_seq_get_data (
2226
- ctx : llama_context_p , dst : CtypesArray [ctypes .c_uint8 ], seq_id : llama_seq_id , /
2251
+ ctx : llama_context_p ,
2252
+ dst : CtypesArray [ctypes .c_uint8 ],
2253
+ size : Union [ctypes .c_size_t , int ],
2254
+ seq_id : llama_seq_id ,
2255
+ / ,
2227
2256
) -> int :
2228
2257
"""Copy the KV cache of a single sequence into the specified buffer"""
2229
2258
...
@@ -2236,14 +2265,24 @@ def llama_state_seq_get_data(
2236
2265
# LLAMA_API size_t llama_state_seq_set_data(
2237
2266
# struct llama_context * ctx,
2238
2267
# const uint8_t * src,
2268
+ # size_t size,
2239
2269
# llama_seq_id dest_seq_id);
2240
2270
@ctypes_function (
2241
2271
"llama_state_seq_set_data" ,
2242
- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), llama_seq_id ],
2272
+ [
2273
+ llama_context_p_ctypes ,
2274
+ ctypes .POINTER (ctypes .c_uint8 ),
2275
+ ctypes .c_size_t ,
2276
+ llama_seq_id ,
2277
+ ],
2243
2278
ctypes .c_size_t ,
2244
2279
)
2245
2280
def llama_state_seq_set_data (
2246
- ctx : llama_context_p , src : CtypesArray [ctypes .c_uint8 ], dest_seq_id : llama_seq_id , /
2281
+ ctx : llama_context_p ,
2282
+ src : CtypesArray [ctypes .c_uint8 ],
2283
+ size : Union [ctypes .c_size_t , int ],
2284
+ dest_seq_id : llama_seq_id ,
2285
+ / ,
2247
2286
) -> int :
2248
2287
"""Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
2249
2288
...
0 commit comments