Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
ID is e.g. a population name, rather than silently returning no samples.
(:user:`hyanwong`, :pr:`3344`)

**Features**

- Displaying a summary of the tree sequence now shows the metadata codec and
size of the metadata for each table. (:user:`hyanwong`, :pr:`3343`, :issue:`2637`)

--------------------
[1.0.0] - 2025-11-27
--------------------
Expand Down
38 changes: 34 additions & 4 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1991,8 +1991,25 @@ def test_html_repr(self, ts):
assert len(html) > 5000
assert f"<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>" in html
assert f"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>" in html
for table in ts.tables.table_name_map:
assert f"<td>{table.capitalize()}</td>" in html
codecs = collections.defaultdict(int)
for table_name, table in ts.tables.table_name_map.items():
assert f"<td>{table_name.capitalize()}</td>" in html
if hasattr(table, "metadata_schema"):
schema = table.metadata_schema.schema
codec = schema["codec"] if schema else "raw"
codecs[codec] += 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here could check if it shows 0% for tables without metadata? And that the percent is <= 100%?

assert "<td>Metadata</td>" in html
assert "<th>Metadata</th>" in html
assert "<th>Metadata size</th>" in html
num_tables_with_metadata = 0
for codec, count in codecs.items():
assert html.count(f">{codec}</td>") == count
num_tables_with_metadata += count
# Only one table (provenances) has no metadata
assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
# All metadata tables should show the percentage metadata size
assert html.count("%)</td>") == num_tables_with_metadata

if ts.num_provenances > 0:
assert (
f"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>"
Expand Down Expand Up @@ -2027,8 +2044,21 @@ def test_str(self, ts):
assert len(s) > 999
assert re.search(rf"║Trees *│ *{ts.num_trees}║", s)
assert re.search(rf"║Time Units *│ *{ts.time_units}║", s)
for table in ts.tables.table_name_map:
assert re.search(rf"║{table.capitalize()} *│", s)
codecs = collections.defaultdict(int)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm very much negative on this style of testing now - any complex logic in a test case is to be avoided.

I suggested testing these functions as follows

class TestMetadataCodecStr:
     def test_plain_table_collection(self):
             s = util.metadata_codec(tskit.TableCollection(1))
             assert s == "raw"

This is much more useful long term as well as being easier to write and review.

for table_name, table in ts.tables.table_name_map.items():
assert re.search(rf"║{table_name.capitalize()} *│", s)
if hasattr(table, "metadata_schema"):
schema = table.metadata_schema.schema
codec = schema["codec"] if schema else "raw"
codecs[codec] += 1
num_tables_with_metadata = 0
for codec, count in codecs.items():
assert s.count(codec) == count
num_tables_with_metadata += count
# Only one table (provenances) has no metadata
assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
# All metadata tables should show the percentage metadata size
assert s.count("%)") == num_tables_with_metadata

@pytest.mark.skip("FIXME nbytes")
def test_nbytes(self, tmp_path, ts_fixture):
Expand Down
9 changes: 3 additions & 6 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -4476,19 +4476,16 @@ def __str__(self):
["Sample Nodes", util.format_number(self.num_samples, sep=",")],
["Total Size", util.naturalsize(self.nbytes)],
]
header = ["Table", "Rows", "Size", "Has Metadata"]
header = ["Table", "Rows", "Size", "Metadata", "Metadata size"]
table_rows = []
for name, table in self.tables.table_name_map.items():
table_rows.append(
[
name.capitalize(),
f"{util.format_number(table.num_rows, sep=',')}",
util.naturalsize(table.nbytes),
(
"Yes"
if hasattr(table, "metadata") and len(table.metadata) > 0
else "No"
),
util.metadata_codec(table),
util.metadata_size(table),
]
)
return util.unicode_table(ts_rows, title="TreeSequence") + util.unicode_table(
Expand Down
23 changes: 18 additions & 5 deletions python/tskit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,17 +534,29 @@ def html_table(rows, *, header):
"""


def metadata_codec(table):
if hasattr(table, "metadata_schema"):
schema = table.metadata_schema.schema
return "raw" if schema is None else schema.get("codec", "unknown")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the tests don't check if this value is correct? (eg, no examples of "unknown" codec)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good reason why logic like this should be spelled out longhand rather than aiming for maximally concise. Much easier to track test coverage.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not even sure that we can have a schema with no codec, but as this is display code, I thought it better not to fail here, and I didn't really want to dive into understanding what was and wasn't acceptable as a schema. Bu I guess I should just allow this to fail if there's no codec then.

return ""


def metadata_size(table):
if hasattr(table, "metadata"):
frac = len(table.metadata) / table.nbytes
return f"{naturalsize(len(table.metadata))} ({frac:.0%})"
return ""


def tree_sequence_html(ts):
table_rows = "".join(
f"""
<tr>
<td>{name.capitalize()}</td>
<td>{format_number(table.num_rows)}</td>
<td>{naturalsize(table.nbytes)}</td>
<td style="text-align: center;">
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
else ''}
</td>
<td style="text-align: center;">{metadata_codec(table)}</td>
<td>{metadata_size(table)}</td>
</tr>
"""
for name, table in ts.tables.table_name_map.items()
Expand Down Expand Up @@ -637,7 +649,8 @@ def tree_sequence_html(ts):
<th style="line-height:21px;">Table</th>
<th>Rows</th>
<th>Size</th>
<th>Has Metadata</th>
<th>Metadata</th>
<th>Metadata size</th>
</tr>
</thead>
<tbody>
Expand Down