Skip to content

Commit abf312d

Browse files
authored
Display context chunks in ask and search results (#149)
* Printing querying time * Adding source name to chunks Adding source name as metadata to chunks, then printing the sources when searching * Printing the context provided to LLM To check the data transmitted to the LLMs : display the relevance, ID, content, and source of each sent chunk. * Correcting source as metadata for chunks * Applying ruff format * Applying Ruff formatting * Ruff formatting
1 parent ab251ab commit abf312d

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

packages/leann-core/src/leann/api.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,17 @@ def ask(
12361236
"Please provide the best answer you can based on this context and your knowledge."
12371237
)
12381238

1239+
print("The context provided to the LLM is:")
1240+
print(f"{'Relevance':<10} | {'Chunk id':<10} | {'Content':<60} | {'Source':<80}")
1241+
print("-" * 150)
1242+
for r in results:
1243+
chunk_relevance = f"{r.score:.3f}"
1244+
chunk_id = r.id
1245+
chunk_content = r.text[:60]
1246+
chunk_source = r.metadata.get("source", "")[:80]
1247+
print(
1248+
f"{chunk_relevance:<10} | {chunk_id:<10} | {chunk_content:<60} | {chunk_source:<80}"
1249+
)
12391250
ask_time = time.time()
12401251
ans = self.llm.ask(prompt, **llm_kwargs)
12411252
ask_time = time.time() - ask_time

packages/leann-core/src/leann/cli.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import asyncio
3+
import time
34
from pathlib import Path
45
from typing import Any, Optional, Union
56

@@ -1186,6 +1187,7 @@ def file_filter(
11861187
for doc in other_docs:
11871188
file_path = doc.metadata.get("file_path", "")
11881189
if file_filter(file_path):
1190+
doc.metadata["source"] = file_path
11891191
filtered_docs.append(doc)
11901192

11911193
documents.extend(filtered_docs)
@@ -1290,7 +1292,10 @@ def file_filter(
12901292
nodes = parser.get_nodes_from_documents([doc])
12911293

12921294
for node in nodes:
1293-
all_texts.append(node.get_content())
1295+
text_with_source = (
1296+
"Chunk source:" + source_path + "\n" + node.get_content().replace("\n", " ")
1297+
)
1298+
all_texts.append(text_with_source)
12941299

12951300
print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks")
12961301
return all_texts
@@ -1388,8 +1393,10 @@ async def build_index(self, args):
13881393
num_threads=args.num_threads,
13891394
)
13901395

1391-
for chunk_text in all_texts:
1392-
builder.add_text(chunk_text)
1396+
for chunk_text_with_source in all_texts:
1397+
chunk_source = chunk_text_with_source.split("\n")[0].split(":")[1]
1398+
chunk_text = chunk_text_with_source.split("\n")[1]
1399+
builder.add_text(chunk_text, {"source": chunk_source})
13931400

13941401
builder.build_index(index_path)
13951402
print(f"Index built at {index_path}")
@@ -1511,6 +1518,7 @@ async def search_documents(self, args):
15111518
for i, result in enumerate(results, 1):
15121519
print(f"{i}. Score: {result.score:.3f}")
15131520
print(f" {result.text[:200]}...")
1521+
print(f" Source: {result.metadata.get('source', '')}")
15141522
print()
15151523

15161524
async def ask_questions(self, args):
@@ -1542,6 +1550,7 @@ async def ask_questions(self, args):
15421550
llm_kwargs["thinking_budget"] = args.thinking_budget
15431551

15441552
def _ask_once(prompt: str) -> None:
1553+
query_start_time = time.time()
15451554
response = chat.ask(
15461555
prompt,
15471556
top_k=args.top_k,
@@ -1552,7 +1561,9 @@ def _ask_once(prompt: str) -> None:
15521561
pruning_strategy=args.pruning_strategy,
15531562
llm_kwargs=llm_kwargs,
15541563
)
1564+
query_completion_time = time.time() - query_start_time
15551565
print(f"LEANN: {response}")
1566+
print(f"The query took {query_completion_time:.3f} seconds to finish")
15561567

15571568
initial_query = (args.query or "").strip()
15581569

0 commit comments

Comments
 (0)