-
Notifications
You must be signed in to change notification settings - Fork 78
Show metadata size #3343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Show metadata size #3343
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1991,8 +1991,25 @@ def test_html_repr(self, ts): | |
| assert len(html) > 5000 | ||
| assert f"<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>" in html | ||
| assert f"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>" in html | ||
| for table in ts.tables.table_name_map: | ||
| assert f"<td>{table.capitalize()}</td>" in html | ||
| codecs = collections.defaultdict(int) | ||
| for table_name, table in ts.tables.table_name_map.items(): | ||
| assert f"<td>{table_name.capitalize()}</td>" in html | ||
| if hasattr(table, "metadata_schema"): | ||
| schema = table.metadata_schema.schema | ||
| codec = schema["codec"] if schema else "raw" | ||
| codecs[codec] += 1 | ||
| assert "<td>Metadata</td>" in html | ||
| assert "<th>Metadata</th>" in html | ||
| assert "<th>Metadata size</th>" in html | ||
| num_tables_with_metadata = 0 | ||
| for codec, count in codecs.items(): | ||
| assert html.count(f">{codec}</td>") == count | ||
| num_tables_with_metadata += count | ||
| # Only one table (provenances) has no metadata | ||
| assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1 | ||
| # All metadata tables should show the percentage metadata size | ||
| assert html.count("%)</td>") == num_tables_with_metadata | ||
|
|
||
| if ts.num_provenances > 0: | ||
| assert ( | ||
| f"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>" | ||
|
|
@@ -2027,8 +2044,21 @@ def test_str(self, ts): | |
| assert len(s) > 999 | ||
| assert re.search(rf"║Trees *│ *{ts.num_trees}║", s) | ||
| assert re.search(rf"║Time Units *│ *{ts.time_units}║", s) | ||
| for table in ts.tables.table_name_map: | ||
| assert re.search(rf"║{table.capitalize()} *│", s) | ||
| codecs = collections.defaultdict(int) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm very much negative on this style of testing now - any complex logic in a test case is to be avoided. I suggested testing these functions as follows This is much more useful long term as well as being easier to write and review. |
||
| for table_name, table in ts.tables.table_name_map.items(): | ||
| assert re.search(rf"║{table_name.capitalize()} *│", s) | ||
| if hasattr(table, "metadata_schema"): | ||
| schema = table.metadata_schema.schema | ||
| codec = schema["codec"] if schema else "raw" | ||
| codecs[codec] += 1 | ||
| num_tables_with_metadata = 0 | ||
| for codec, count in codecs.items(): | ||
| assert s.count(codec) == count | ||
| num_tables_with_metadata += count | ||
| # Only one table (provenances) has no metadata | ||
| assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1 | ||
| # All metadata tables should show the percentage metadata size | ||
| assert s.count("%)") == num_tables_with_metadata | ||
|
|
||
| @pytest.mark.skip("FIXME nbytes") | ||
| def test_nbytes(self, tmp_path, ts_fixture): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -534,17 +534,29 @@ def html_table(rows, *, header): | |
| """ | ||
|
|
||
|
|
||
| def metadata_codec(table): | ||
| if hasattr(table, "metadata_schema"): | ||
| schema = table.metadata_schema.schema | ||
| return "raw" if schema is None else schema.get("codec", "unknown") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like the tests don't check if this value is correct? (eg, no examples of "unknown" codec)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good reason why logic like this should be spelled out longhand rather than aiming for maximally concise. Much easier to track test coverage.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not even sure that we can have a schema with no codec, but as this is display code, I thought it better not to fail here, and I didn't really want to dive into understanding what was and wasn't acceptable as a schema. Bu I guess I should just allow this to fail if there's no codec then. |
||
| return "" | ||
|
|
||
|
|
||
| def metadata_size(table): | ||
| if hasattr(table, "metadata"): | ||
| frac = len(table.metadata) / table.nbytes | ||
| return f"{naturalsize(len(table.metadata))} ({frac:.0%})" | ||
| return "" | ||
|
|
||
|
|
||
| def tree_sequence_html(ts): | ||
| table_rows = "".join( | ||
| f""" | ||
| <tr> | ||
| <td>{name.capitalize()}</td> | ||
| <td>{format_number(table.num_rows)}</td> | ||
| <td>{naturalsize(table.nbytes)}</td> | ||
| <td style="text-align: center;"> | ||
| {'✅' if hasattr(table, "metadata") and len(table.metadata) > 0 | ||
| else ''} | ||
| </td> | ||
| <td style="text-align: center;">{metadata_codec(table)}</td> | ||
| <td>{metadata_size(table)}</td> | ||
| </tr> | ||
| """ | ||
| for name, table in ts.tables.table_name_map.items() | ||
|
|
@@ -637,7 +649,8 @@ def tree_sequence_html(ts): | |
| <th style="line-height:21px;">Table</th> | ||
| <th>Rows</th> | ||
| <th>Size</th> | ||
| <th>Has Metadata</th> | ||
| <th>Metadata</th> | ||
| <th>Metadata size</th> | ||
| </tr> | ||
| </thead> | ||
| <tbody> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here could check if it shows 0% for tables without metadata? And that the percent is <= 100%?