Skip to content

Commit 50bb20f

Browse files
committed
fix
1 parent a82d186 commit 50bb20f

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "fix csv reader"
4+
}

packages/graphrag-input/graphrag_input/csv.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""A module containing 'CSVFileReader' model."""
55

66
import csv
7+
import io
78
import logging
89
import sys
910

@@ -39,6 +40,6 @@ async def read_file(self, path: str) -> list[TextDocument]:
3940
"""
4041
file = await self._storage.get(path, encoding=self._encoding)
4142

42-
reader = csv.DictReader(file.splitlines())
43+
reader = csv.DictReader(io.StringIO(file))
4344
rows = list(reader)
4445
return await self.process_data_columns(rows, path)

tests/unit/indexing/input/test_csv_loader.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,24 @@ async def test_csv_loader_multiple_files():
5454
reader = create_input_reader(config, storage)
5555
documents = await reader.read_files()
5656
assert len(documents) == 4
57+
58+
59+
async def test_csv_loader_preserves_multiline_fields():
60+
"""Multiline quoted CSV fields must retain their internal newlines."""
61+
config = InputConfig(
62+
type=InputType.Csv,
63+
text_column="text",
64+
title_column="title",
65+
)
66+
storage = create_storage(
67+
StorageConfig(
68+
base_dir="tests/unit/indexing/input/data/multiline-csv",
69+
)
70+
)
71+
reader = create_input_reader(config, storage)
72+
documents = await reader.read_files()
73+
assert len(documents) == 2
74+
assert documents[0].title == "Post 1"
75+
assert documents[0].text == "Line one.\nLine two.\nLine three."
76+
assert documents[1].title == "Post 2"
77+
assert documents[1].text == "Single line."

0 commit comments

Comments
 (0)