diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index d11a2a0e2d..a4db43367b 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -8,6 +8,11 @@ ID is e.g. a population name, rather than silently returning no samples. (:user:`hyanwong`, :pr:`3344`) +**Features** + +- Displaying a summary of the tree sequence now shows the metadata codec and + size of the metadata for each table. (:user:`hyanwong`, :pr:`3343`, :issue:`2637`) + -------------------- [1.0.0] - 2025-11-27 -------------------- diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 9ce5a928d2..b2f5e90e45 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1991,8 +1991,25 @@ def test_html_repr(self, ts): assert len(html) > 5000 assert f"Trees{ts.num_trees:,}" in html assert f"Time Units{ts.time_units}" in html - for table in ts.tables.table_name_map: - assert f"{table.capitalize()}" in html + codecs = collections.defaultdict(int) + for table_name, table in ts.tables.table_name_map.items(): + assert f"{table_name.capitalize()}" in html + if hasattr(table, "metadata_schema"): + schema = table.metadata_schema.schema + codec = schema["codec"] if schema else "raw" + codecs[codec] += 1 + assert "Metadata" in html + assert "Metadata" in html + assert "Metadata size" in html + num_tables_with_metadata = 0 + for codec, count in codecs.items(): + assert html.count(f">{codec}") == count + num_tables_with_metadata += count + # Only one table (provenances) has no metadata + assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1 + # All metadata tables should show the percentage metadata size + assert html.count("%)") == num_tables_with_metadata + if ts.num_provenances > 0: assert ( f"{json.loads(ts.provenance(0).record)['software']['name']}" @@ -2027,8 +2044,21 @@ def test_str(self, ts): assert len(s) > 999 assert re.search(rf"║Trees *│ *{ts.num_trees}║", s) assert re.search(rf"║Time Units *│ *{ts.time_units}║", s) - for table in ts.tables.table_name_map: - assert re.search(rf"║{table.capitalize()} *│", s) + codecs = collections.defaultdict(int) + for table_name, table in ts.tables.table_name_map.items(): + assert re.search(rf"║{table_name.capitalize()} *│", s) + if hasattr(table, "metadata_schema"): + schema = table.metadata_schema.schema + codec = schema["codec"] if schema else "raw" + codecs[codec] += 1 + num_tables_with_metadata = 0 + for codec, count in codecs.items(): + assert s.count(codec) == count + num_tables_with_metadata += count + # Only one table (provenances) has no metadata + assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1 + # All metadata tables should show the percentage metadata size + assert s.count("%)") == num_tables_with_metadata @pytest.mark.skip("FIXME nbytes") def test_nbytes(self, tmp_path, ts_fixture): diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 7775231dde..ec8913c3b5 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -4476,7 +4476,7 @@ def __str__(self): ["Sample Nodes", util.format_number(self.num_samples, sep=",")], ["Total Size", util.naturalsize(self.nbytes)], ] - header = ["Table", "Rows", "Size", "Has Metadata"] + header = ["Table", "Rows", "Size", "Metadata", "Metadata size"] table_rows = [] for name, table in self.tables.table_name_map.items(): table_rows.append( @@ -4484,11 +4484,8 @@ def __str__(self): name.capitalize(), f"{util.format_number(table.num_rows, sep=',')}", util.naturalsize(table.nbytes), - ( - "Yes" - if hasattr(table, "metadata") and len(table.metadata) > 0 - else "No" - ), + util.metadata_codec(table), + util.metadata_size(table), ] ) return util.unicode_table(ts_rows, title="TreeSequence") + util.unicode_table( diff --git a/python/tskit/util.py b/python/tskit/util.py index 64fad3f423..2f19256533 100644 --- a/python/tskit/util.py +++ b/python/tskit/util.py @@ -534,6 +534,20 @@ def html_table(rows, *, header): """ +def metadata_codec(table): + if hasattr(table, "metadata_schema"): + schema = table.metadata_schema.schema + return "raw" if schema is None else schema.get("codec", "unknown") + return "" + + +def metadata_size(table): + if hasattr(table, "metadata"): + frac = len(table.metadata) / table.nbytes + return f"{naturalsize(len(table.metadata))} ({frac:.0%})" + return "" + + def tree_sequence_html(ts): table_rows = "".join( f""" @@ -541,10 +555,8 @@ def tree_sequence_html(ts): {name.capitalize()} {format_number(table.num_rows)} {naturalsize(table.nbytes)} - - {'✅' if hasattr(table, "metadata") and len(table.metadata) > 0 - else ''} - + {metadata_codec(table)} + {metadata_size(table)} """ for name, table in ts.tables.table_name_map.items() @@ -637,7 +649,8 @@ def tree_sequence_html(ts): Table Rows Size - Has Metadata + Metadata + Metadata size