diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst
index d11a2a0e2d..a4db43367b 100644
--- a/python/CHANGELOG.rst
+++ b/python/CHANGELOG.rst
@@ -8,6 +8,11 @@
ID is e.g. a population name, rather than silently returning no samples.
(:user:`hyanwong`, :pr:`3344`)
+**Features**
+
+- Displaying a summary of the tree sequence now shows the metadata codec and
+ size of the metadata for each table. (:user:`hyanwong`, :pr:`3343`, :issue:`2637`)
+
--------------------
[1.0.0] - 2025-11-27
--------------------
diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py
index 9ce5a928d2..b2f5e90e45 100644
--- a/python/tests/test_highlevel.py
+++ b/python/tests/test_highlevel.py
@@ -1991,8 +1991,25 @@ def test_html_repr(self, ts):
assert len(html) > 5000
assert f"
| Trees | {ts.num_trees:,} |
" in html
assert f"| Time Units | {ts.time_units} |
" in html
- for table in ts.tables.table_name_map:
- assert f"{table.capitalize()} | " in html
+ codecs = collections.defaultdict(int)
+ for table_name, table in ts.tables.table_name_map.items():
+ assert f"{table_name.capitalize()} | " in html
+ if hasattr(table, "metadata_schema"):
+ schema = table.metadata_schema.schema
+ codec = schema["codec"] if schema else "raw"
+ codecs[codec] += 1
+ assert "Metadata | " in html
+ assert "Metadata | " in html
+ assert "Metadata size | " in html
+ num_tables_with_metadata = 0
+ for codec, count in codecs.items():
+ assert html.count(f">{codec}") == count
+ num_tables_with_metadata += count
+ # Only one table (provenances) has no metadata
+ assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
+ # All metadata tables should show the percentage metadata size
+ assert html.count("%)") == num_tables_with_metadata
+
if ts.num_provenances > 0:
assert (
f"{json.loads(ts.provenance(0).record)['software']['name']} | "
@@ -2027,8 +2044,21 @@ def test_str(self, ts):
assert len(s) > 999
assert re.search(rf"║Trees *│ *{ts.num_trees}║", s)
assert re.search(rf"║Time Units *│ *{ts.time_units}║", s)
- for table in ts.tables.table_name_map:
- assert re.search(rf"║{table.capitalize()} *│", s)
+ codecs = collections.defaultdict(int)
+ for table_name, table in ts.tables.table_name_map.items():
+ assert re.search(rf"║{table_name.capitalize()} *│", s)
+ if hasattr(table, "metadata_schema"):
+ schema = table.metadata_schema.schema
+ codec = schema["codec"] if schema else "raw"
+ codecs[codec] += 1
+ num_tables_with_metadata = 0
+ for codec, count in codecs.items():
+ assert s.count(codec) == count
+ num_tables_with_metadata += count
+ # Only one table (provenances) has no metadata
+ assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
+ # All metadata tables should show the percentage metadata size
+ assert s.count("%)") == num_tables_with_metadata
@pytest.mark.skip("FIXME nbytes")
def test_nbytes(self, tmp_path, ts_fixture):
diff --git a/python/tskit/trees.py b/python/tskit/trees.py
index 7775231dde..ec8913c3b5 100644
--- a/python/tskit/trees.py
+++ b/python/tskit/trees.py
@@ -4476,7 +4476,7 @@ def __str__(self):
["Sample Nodes", util.format_number(self.num_samples, sep=",")],
["Total Size", util.naturalsize(self.nbytes)],
]
- header = ["Table", "Rows", "Size", "Has Metadata"]
+ header = ["Table", "Rows", "Size", "Metadata", "Metadata size"]
table_rows = []
for name, table in self.tables.table_name_map.items():
table_rows.append(
@@ -4484,11 +4484,8 @@ def __str__(self):
name.capitalize(),
f"{util.format_number(table.num_rows, sep=',')}",
util.naturalsize(table.nbytes),
- (
- "Yes"
- if hasattr(table, "metadata") and len(table.metadata) > 0
- else "No"
- ),
+ util.metadata_codec(table),
+ util.metadata_size(table),
]
)
return util.unicode_table(ts_rows, title="TreeSequence") + util.unicode_table(
diff --git a/python/tskit/util.py b/python/tskit/util.py
index 64fad3f423..2f19256533 100644
--- a/python/tskit/util.py
+++ b/python/tskit/util.py
@@ -534,6 +534,20 @@ def html_table(rows, *, header):
"""
+def metadata_codec(table):
+ if hasattr(table, "metadata_schema"):
+ schema = table.metadata_schema.schema
+ return "raw" if schema is None else schema.get("codec", "unknown")
+ return ""
+
+
+def metadata_size(table):
+ if hasattr(table, "metadata"):
+ frac = len(table.metadata) / table.nbytes
+ return f"{naturalsize(len(table.metadata))} ({frac:.0%})"
+ return ""
+
+
def tree_sequence_html(ts):
table_rows = "".join(
f"""
@@ -541,10 +555,8 @@ def tree_sequence_html(ts):
{name.capitalize()} |
{format_number(table.num_rows)} |
{naturalsize(table.nbytes)} |
-
- {'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
- else ''}
- |
+ {metadata_codec(table)} |
+ {metadata_size(table)} |
"""
for name, table in ts.tables.table_name_map.items()
@@ -637,7 +649,8 @@ def tree_sequence_html(ts):
Table |
Rows |
Size |
- Has Metadata |
+ Metadata |
+ Metadata size |