Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,67 @@
## [2025-10-26]

### llama-index-core [0.14.6]

- Add allow_parallel_tool_calls for non-streaming ([#20117](https://github.com/run-llama/llama_index/pull/20117))
- Fix invalid use of field-specific metadata ([#20122](https://github.com/run-llama/llama_index/pull/20122))
- update doc for SemanticSplitterNodeParser ([#20125](https://github.com/run-llama/llama_index/pull/20125))
- fix rare cases when sentence splits are larger than chunk size ([#20147](https://github.com/run-llama/llama_index/pull/20147))

### llama-index-embeddings-bedrock [0.7.0]

- Fix BedrockEmbedding to support Cohere v4 response format ([#20094](https://github.com/run-llama/llama_index/pull/20094))

### llama-index-embeddings-isaacus [0.1.0]

- feat: Isaacus embeddings integration ([#20124](https://github.com/run-llama/llama_index/pull/20124))

### llama-index-embeddings-oci-genai [0.4.2]

- Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))

### llama-index-llms-anthropic [0.9.7]

- Fix double token stream in anthropic llm ([#20108](https://github.com/run-llama/llama_index/pull/20108))
- Ensure anthropic content delta only has user facing response ([#20113](https://github.com/run-llama/llama_index/pull/20113))

### llama-index-llms-baseten [0.1.7]

- add GLM ([#20121](https://github.com/run-llama/llama_index/pull/20121))

### llama-index-llms-helicone [0.1.0]

- integrate helicone to llama-index ([#20131](https://github.com/run-llama/llama_index/pull/20131))

### llama-index-llms-oci-genai [0.6.4]

- Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))

### llama-index-llms-openai [0.6.5]

- chore: openai vbump ([#20095](https://github.com/run-llama/llama_index/pull/20095))

### llama-index-readers-imdb-review [0.4.2]

- chore: Update selenium dependency in imdb-review reader ([#20105](https://github.com/run-llama/llama_index/pull/20105))

### llama-index-retrievers-bedrock [0.5.0]

- feat(bedrock): add async support for AmazonKnowledgeBasesRetriever ([#20114](https://github.com/run-llama/llama_index/pull/20114))

### llama-index-retrievers-superlinked [0.1.3]

- Update README.md ([#19829](https://github.com/run-llama/llama_index/pull/19829))

### llama-index-storage-kvstore-postgres [0.4.2]

- fix: Replace raw SQL string interpolation with proper SQLAlchemy parameterized APIs in PostgresKVStore ([#20104](https://github.com/run-llama/llama_index/pull/20104))

### llama-index-tools-mcp [0.4.3]

- Fix BasicMCPClient resource signatures ([#20118](https://github.com/run-llama/llama_index/pull/20118))

### llama-index-vector-stores-postgres [0.7.1]

- Add GIN index support for text array metadata in PostgreSQL vector store ([#20130](https://github.com/run-llama/llama_index/pull/20130))

## [2025-10-15]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Type,
Union,
Callable,
Literal,
)


Expand Down Expand Up @@ -139,9 +140,9 @@ class GoogleGenAI(FunctionCallingLLM):
default=None,
description="Google GenAI tool to use for the model to augment responses.",
)
use_file_api: bool = Field(
default=True,
description="Whether or not to use the FileAPI for large files (>20MB).",
file_mode: Literal["inline", "fileapi", "hybrid"] = Field(
default="hybrid",
description="Whether to use inline-only, FileAPI-only or both for handling files.",
)

_max_tokens: int = PrivateAttr()
Expand All @@ -165,7 +166,7 @@ def __init__(
is_function_calling_model: bool = True,
cached_content: Optional[str] = None,
built_in_tool: Optional[types.Tool] = None,
use_file_api: bool = True,
file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid",
**kwargs: Any,
):
# API keys are optional. The API can be authorised via OAuth (detected
Expand Down Expand Up @@ -214,7 +215,7 @@ def __init__(
max_retries=max_retries,
cached_content=cached_content,
built_in_tool=built_in_tool,
use_file_api=use_file_api,
file_mode=file_mode,
**kwargs,
)

Expand Down Expand Up @@ -307,20 +308,17 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any):
**kwargs.pop("generation_config", {}),
}
params = {**kwargs, "generation_config": generation_config}
next_msg, chat_kwargs = asyncio.run(
next_msg, chat_kwargs, file_api_names = asyncio.run(
prepare_chat_params(
self.model, messages, self.use_file_api, self._client, **params
self.model, messages, self.file_mode, self._client, **params
)
)
chat = self._client.chats.create(**chat_kwargs)
response = chat.send_message(
next_msg.parts if isinstance(next_msg, types.Content) else next_msg
)

if self.use_file_api:
asyncio.run(
delete_uploaded_files([*chat_kwargs["history"], next_msg], self._client)
)
asyncio.run(delete_uploaded_files(file_api_names, self._client))

return chat_from_gemini_response(response)

Expand All @@ -331,18 +329,15 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any):
**kwargs.pop("generation_config", {}),
}
params = {**kwargs, "generation_config": generation_config}
next_msg, chat_kwargs = await prepare_chat_params(
self.model, messages, self.use_file_api, self._client, **params
next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
self.model, messages, self.file_mode, self._client, **params
)
chat = self._client.aio.chats.create(**chat_kwargs)
response = await chat.send_message(
next_msg.parts if isinstance(next_msg, types.Content) else next_msg
)

if self.use_file_api:
await delete_uploaded_files(
[*chat_kwargs["history"], next_msg], self._client
)
await delete_uploaded_files(file_api_names, self._client)

return chat_from_gemini_response(response)

Expand All @@ -364,9 +359,9 @@ def _stream_chat(
**kwargs.pop("generation_config", {}),
}
params = {**kwargs, "generation_config": generation_config}
next_msg, chat_kwargs = asyncio.run(
next_msg, chat_kwargs, file_api_names = asyncio.run(
prepare_chat_params(
self.model, messages, self.use_file_api, self._client, **params
self.model, messages, self.file_mode, self._client, **params
)
)
chat = self._client.chats.create(**chat_kwargs)
Expand Down Expand Up @@ -399,12 +394,8 @@ def gen() -> ChatResponseGen:
llama_resp.message.additional_kwargs["tool_calls"] = existing_tool_calls
yield llama_resp

if self.use_file_api:
asyncio.run(
delete_uploaded_files(
[*chat_kwargs["history"], next_msg], self._client
)
)
if self.file_mode in ("fileapi", "hybrid"):
asyncio.run(delete_uploaded_files(file_api_names, self._client))

return gen()

Expand All @@ -422,8 +413,8 @@ async def _astream_chat(
**kwargs.pop("generation_config", {}),
}
params = {**kwargs, "generation_config": generation_config}
next_msg, chat_kwargs = await prepare_chat_params(
self.model, messages, self.use_file_api, self._client, **params
next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
self.model, messages, self.file_mode, self._client, **params
)
chat = self._client.aio.chats.create(**chat_kwargs)

Expand Down Expand Up @@ -463,10 +454,7 @@ async def gen() -> ChatResponseAsyncGen:
)
yield llama_resp

if self.use_file_api:
await delete_uploaded_files(
[*chat_kwargs["history"], next_msg], self._client
)
await delete_uploaded_files(file_api_names, self._client)

return gen()

Expand Down Expand Up @@ -585,12 +573,13 @@ def structured_predict_without_function_calling(
llm_kwargs = llm_kwargs or {}

messages = prompt.format_messages(**prompt_args)
contents = [
asyncio.run(
chat_message_to_gemini(message, self.use_file_api, self._client)
)
contents_and_names = [
asyncio.run(chat_message_to_gemini(message, self.file_mode, self._client))
for message in messages
]
contents = [it[0] for it in contents_and_names]
file_api_names = [name for it in contents_and_names for name in it[1]]

response = self._client.models.generate_content(
model=self.model,
contents=contents,
Expand All @@ -605,8 +594,7 @@ def structured_predict_without_function_calling(
},
)

if self.use_file_api:
asyncio.run(delete_uploaded_files(contents, self._client))
asyncio.run(delete_uploaded_files(file_api_names, self._client))

if isinstance(response.parsed, BaseModel):
return response.parsed
Expand Down Expand Up @@ -635,20 +623,22 @@ def structured_predict(
generation_config["response_schema"] = output_cls

messages = prompt.format_messages(**prompt_args)
contents = [
contents_and_names = [
asyncio.run(
chat_message_to_gemini(message, self.use_file_api, self._client)
chat_message_to_gemini(message, self.file_mode, self._client)
)
for message in messages
]
contents = [it[0] for it in contents_and_names]
file_api_names = [name for it in contents_and_names for name in it[1]]

response = self._client.models.generate_content(
model=self.model,
contents=contents,
config=generation_config,
)

if self.use_file_api:
asyncio.run(delete_uploaded_files(contents, self._client))
asyncio.run(delete_uploaded_files(file_api_names, self._client))

if isinstance(response.parsed, BaseModel):
return response.parsed
Expand Down Expand Up @@ -682,20 +672,22 @@ async def astructured_predict(
generation_config["response_schema"] = output_cls

messages = prompt.format_messages(**prompt_args)
contents = await asyncio.gather(
contents_and_names = await asyncio.gather(
*[
chat_message_to_gemini(message, self.use_file_api, self._client)
chat_message_to_gemini(message, self.file_mode, self._client)
for message in messages
]
)
contents = [it[0] for it in contents_and_names]
file_api_names = [name for it in contents_and_names for name in it[1]]

response = await self._client.aio.models.generate_content(
model=self.model,
contents=contents,
config=generation_config,
)

if self.use_file_api:
await delete_uploaded_files(contents, self._client)
await delete_uploaded_files(file_api_names, self._client)

if isinstance(response.parsed, BaseModel):
return response.parsed
Expand Down Expand Up @@ -729,12 +721,14 @@ def stream_structured_predict(
generation_config["response_schema"] = output_cls

messages = prompt.format_messages(**prompt_args)
contents = [
contents_and_names = [
asyncio.run(
chat_message_to_gemini(message, self.use_file_api, self._client)
chat_message_to_gemini(message, self.file_mode, self._client)
)
for message in messages
]
contents = [it[0] for it in contents_and_names]
file_api_names = [name for it in contents_and_names for name in it[1]]

def gen() -> Generator[Union[Model, FlexibleModel], None, None]:
flexible_model = create_flexible_model(output_cls)
Expand All @@ -758,8 +752,7 @@ def gen() -> Generator[Union[Model, FlexibleModel], None, None]:
if streaming_model:
yield streaming_model

if self.use_file_api:
asyncio.run(delete_uploaded_files(contents, self._client))
asyncio.run(delete_uploaded_files(file_api_names, self._client))

return gen()
else:
Expand Down Expand Up @@ -789,12 +782,14 @@ async def astream_structured_predict(
generation_config["response_schema"] = output_cls

messages = prompt.format_messages(**prompt_args)
contents = await asyncio.gather(
contents_and_names = await asyncio.gather(
*[
chat_message_to_gemini(message, self.use_file_api, self._client)
chat_message_to_gemini(message, self.file_mode, self._client)
for message in messages
]
)
contents = [it[0] for it in contents_and_names]
file_api_names = [name for it in contents_and_names for name in it[1]]

async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]:
flexible_model = create_flexible_model(output_cls)
Expand All @@ -818,8 +813,7 @@ async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]:
if streaming_model:
yield streaming_model

if self.use_file_api:
await delete_uploaded_files(contents, self._client)
await delete_uploaded_files(file_api_names, self._client)

return gen()
else:
Expand Down
Loading
Loading