Skip to content

Commit 818623b

Browse files
authored
Merge pull request #330 from iiasa/project/ssp/transport/2025-04-08
Two adjustments to `.project.ssp.transport`
2 parents 5e6bf56 + 2983aee commit 818623b

File tree

5 files changed

+117
-66
lines changed

5 files changed

+117
-66
lines changed

doc/whatsnew.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ SSP :ref:`ssp-2024`/ScenarioMIP
4444

4545
Improve :mod:`.ssp.transport`:
4646

47-
- Add :func:`~.ssp.transport.method_B` and make this the default (:pull:`259`).
48-
- Add :func:`~.ssp.transport.method_C` (:issue:`305`, :pull:`325`).
49-
- Add :func:`~.ssp.transport.process_df` (:pull:`303`).
47+
- Add :func:`~.ssp.transport.method_B` and make this the default (:pull:`259`, :pull:`330`).
48+
- Add :func:`~.ssp.transport.method_C` (:issue:`305`, :pull:`325`, :pull:`330`).
49+
- Add :func:`~.ssp.transport.process_df` (:pull:`303`);
50+
handle data frames containing :py:`np.NaN` (:pull:`330`).
5051
- Adapt to revised ‘variable’ codes (:pull:`309`, :issue:`304`).
5152

5253
Transport

message_ix_models/model/transport/operator.py

+1-39
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
import re
5-
from collections.abc import Mapping, Sequence
5+
from collections.abc import Sequence
66
from functools import partial
77
from itertools import product
88
from operator import gt, le, lt
@@ -26,7 +26,6 @@
2626
MappingAdapter,
2727
datetime_now_with_tz,
2828
minimum_version,
29-
nodes_ex_world,
3029
show_versions,
3130
)
3231
from message_ix_models.util.genno import as_quantity
@@ -71,8 +70,6 @@
7170
"max",
7271
"maybe_select",
7372
"min",
74-
"nodes_ex_world", # Re-export from message_ix_models.util TODO do this upstream
75-
"nodes_world_agg",
7673
"price_units",
7774
"quantity_from_config",
7875
"relabel2",
@@ -832,41 +829,6 @@ def indexers_usage(technologies: list[Code]) -> dict:
832829
}
833830

834831

835-
def nodes_world_agg(config, dim: Hashable = "nl") -> dict[Hashable, Mapping]:
836-
"""Mapping to aggregate e.g. nl="World" from values for child nodes of "World".
837-
838-
This mapping should be used with :func:`.genno.operator.aggregate`, giving the
839-
argument ``keep=False``. It includes 1:1 mapping from each region name to itself.
840-
841-
.. todo:: move to :mod:`message_ix_models.report.operator`.
842-
"""
843-
result = {}
844-
845-
cl = get_codelist(f"node/{config['regions']}")
846-
for n in cl:
847-
# "World" node should have be top-level (its parent is the `cl` itself) and have
848-
# some children. Countries (from pycountry) that are omitted from a mapping have
849-
# no children.
850-
if n.parent is cl and len(n.child):
851-
name = str(n)
852-
853-
# FIXME Remove. This is a hack to suit the legacy reporting, which expects
854-
# global aggregates at *_GLB rather than "World".
855-
new_name = f"{config['regions']}_GLB"
856-
log.info(f"Aggregates for {n!r} will be labelled {new_name!r}")
857-
name = new_name
858-
859-
# Global total as aggregate of child nodes
860-
result = {name: list(map(str, n.child))}
861-
862-
# Also add "no-op" aggregates e.g. "R12_AFR" is the sum of ["R12_AFR"]
863-
result.update({c: [c] for c in map(str, n.child)})
864-
865-
return {dim: result}
866-
867-
raise RuntimeError("Failed to identify the World node")
868-
869-
870832
def price_units(qty: "AnyQuantity") -> "AnyQuantity":
871833
"""Forcibly adjust price units, if necessary."""
872834
target = "USD_2010 / km"

message_ix_models/project/ssp/transport.py

+36-16
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def get_computer(
265265
context = Context(model=ModelConfig(regions="R12"))
266266
# Store in `c` for reference by other operations
267267
c.add("context", context)
268+
c.graph["config"].update(regions="R12")
268269

269270
# Store a model name and scenario name from a single row of the data
270271
model_name, scenario_name = row0[["Model", "Scenario"]]
@@ -413,11 +414,15 @@ def method_B(c: "Computer") -> None:
413414
c.add(fe.iea[1], "aggregate", fe.iea[0], g, keep=False)
414415

415416
# Rename dimensions
416-
c.add(fe.cnt, "rename_dims", fe.iea[1], name_dict=dict(flow="t", product="c"))
417+
c.add(fe.cnt[0], "rename_dims", fe.iea[1], name_dict=dict(flow="t", product="c"))
417418

418-
# Compute ratio
419-
c.add(fe.share[0], "select", fe.cnt, indexers=dict(t="_1"), drop=True)
420-
c.add(fe.share[1], "select", fe.cnt, indexers=dict(t="_2"), drop=True)
419+
# Global total
420+
c.add("n::world agg", "nodes_world_agg", "config", dim="n", name=None)
421+
c.add(fe.cnt[1], "aggregate", fe.cnt[0], "n::world agg", keep=False)
422+
423+
# Ratio of _1 (DOMESAIR - AVBUNK) to _2 (TOTTRANS - AVBUNK)
424+
c.add(fe.share[0], "select", fe.cnt[1], indexers=dict(t="_1"), drop=True)
425+
c.add(fe.share[1], "select", fe.cnt[1], indexers=dict(t="_2"), drop=True)
421426
c.add(fe.share, "div", fe.share[0], fe.share[1])
422427

423428
# Prepare remaining calculations
@@ -467,11 +472,11 @@ def method_BC_common(c: "Computer", k_fe_share: "Key") -> None:
467472

468473
# Relabel:
469474
# - c[ommodity]: 'Liquids|Oil' (IAMC 'variable' component) → 'lightoil'
470-
# - n[ode]: 'AFR''R12_AFR' etc.
471-
labels = dict(
472-
c={"Liquids|Oil": "lightoil"},
473-
n={n.id.partition("_")[2]: n.id for n in get_codelist("node/R12")},
474-
)
475+
# - n[ode]: "AFR""R12_AFR" etc. "World" is not changed.
476+
cl = get_codelist("node/R12")
477+
labels = dict(c={"Liquids|Oil": "lightoil"}, n={})
478+
for n in filter(lambda n: len(n.child) and n.id != "World", cl):
479+
labels["n"][n.id.partition("_")[2]] = n.id
475480
c.add(k.fe_in[2] / "UNIT", "relabel", k.fe_in[1] / "UNIT", labels=labels)
476481

477482
### Compute estimate of emissions
@@ -496,7 +501,7 @@ def method_BC_common(c: "Computer", k_fe_share: "Key") -> None:
496501
c.add(k.units, e_UNIT, "e::codelist")
497502
c.add(K.emi[2], "mul", k.emi0[1], k.units, K.bcast)
498503

499-
# Change labels: restore e.g. "AFR" given "R12_AFR"
504+
# Restore labels: "R12_AFR" → "AFR" etc. "World" is not changed.
500505
labels = dict(n={v: k for k, v in labels["n"].items()})
501506
c.add(K.emi, "relabel", K.emi[2], labels=labels)
502507

@@ -527,15 +532,20 @@ def method_C(c: "Computer") -> None:
527532
# Prepare `c` to compute the final energy share for aviation
528533
k = Keys(
529534
# Added by .transport.base.prepare_reporter()
530-
base="in:nl-t-ya-c:transport+units+0",
535+
base="in:nl-t-ya-c:transport+units",
531536
share0=f"fe share:c-nl-ya:{L}",
532537
share1=f"fe share:c-n-y:{L}",
533538
)
534539

540+
# Relabel "R12_GLB" (added by .report.transport.aggregate()) to "World"
541+
labels = {"nl": {"R12_GLB": "World"}}
542+
c.add(k.base[1], "relabel", k.base[0], labels=labels, sums=True)
543+
535544
# Select the numerator
536-
c.add(k.share0["num"], "select", k.base, indexers=dict(t=["AIR"]), drop=True)
537-
# Compute the ratio
538-
c.add(k.share0, "div", k.share0["num"], k.base / "t")
545+
c.add(k.share0["num"], "select", k.base[1], indexers=dict(t=["AIR"]), drop=True)
546+
# Ratio of AIR to the total
547+
c.add(k.share0, "div", k.share0["num"], k.base[1] / "t")
548+
539549
# Rename dimensions as expected by method_BC_common
540550
c.add(k.share1, "rename_dims", k.share0, name_dict={"nl": "n", "ya": "y"})
541551

@@ -559,8 +569,18 @@ def process_df(
559569
# Prepare all other tasks
560570
c = get_computer(data.iloc[0, :], method, platform_name=platform_name)
561571

562-
# Input data: convert `data` to a Quantity with the appropriate structure
563-
c.add(K.input, to_quantity, data, **IAMC_KW)
572+
def fillna(df: pd.DataFrame) -> pd.DataFrame:
573+
"""Replace :py:`np.nan` with 0.0 in certain rows and columns."""
574+
mask = df.Variable.str.fullmatch(
575+
r"Emissions\|[^\|]+\|Energy\|Demand\|(Bunkers|Transportation).*"
576+
)
577+
to_fill = {c: 0.0 for c in df.columns if str(c).isnumeric() and int(c) >= 2020}
578+
return df.where(~mask, df.fillna(to_fill))
579+
580+
# Input data: replace NaN with 0
581+
c.add(K.input[0], fillna, data)
582+
# Convert `data` to a Quantity with the appropriate structure
583+
c.add(K.input, to_quantity, K.input[0], **IAMC_KW)
564584

565585
# Compute and return the result
566586
return c.get("target")

message_ix_models/report/operator.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
import re
5-
from collections.abc import Callable, Hashable, Mapping, Sequence
5+
from collections.abc import Callable, Hashable, Mapping, MutableMapping, Sequence
66
from functools import reduce
77
from itertools import filterfalse, product
88
from typing import TYPE_CHECKING, Any, Optional, Union
@@ -48,6 +48,7 @@ def __lt__(self, __other: Any) -> bool: ...
4848
"merge_data",
4949
"model_periods",
5050
"nodes_ex_world",
51+
"nodes_world_agg",
5152
"quantity_from_iamc",
5253
"remove_ts",
5354
"select_expand",
@@ -244,6 +245,42 @@ def model_periods(y: list[int], cat_year: pd.DataFrame) -> list[int]:
244245
return list(filter(lambda year: y0 <= year, y))
245246

246247

248+
def nodes_world_agg(
249+
config: dict, *, dim: Hashable = "nl", name: Optional[str] = "{}_GLB"
250+
) -> Mapping[Hashable, Mapping[Hashable, list[str]]]:
251+
"""Mapping to aggregate e.g. nl="World" from values for child nodes of "World".
252+
253+
This mapping should be used with :func:`.genno.operator.aggregate`, giving the
254+
argument ``keep=False``. It includes 1:1 mapping from each region name to itself.
255+
"""
256+
from message_ix_models.model.structure import get_codelist
257+
258+
cl = get_codelist(f"node/{config['regions']}")
259+
260+
# "World" node should have be top-level (its parent is the `cl` itself) and have
261+
# some children. Countries (from pycountry) that are omitted from a mapping have no
262+
# children.
263+
try:
264+
node = next(filter(lambda n: n.parent is cl and len(n.child), cl))
265+
except StopIteration:
266+
raise RuntimeError("Failed to identify a 'World' node")
267+
268+
if name:
269+
# FIXME Remove. This is a hack to suit the legacy reporting, which expects
270+
# global aggregates at *_GLB rather than "World".
271+
name = name.format(config["regions"])
272+
log.info(f"Aggregates for {node!r} will be labelled {name!r}")
273+
else:
274+
name = node.id
275+
276+
# Global total as aggregate of child nodes
277+
result: MutableMapping = {name: list(map(str, node.child))}
278+
# Also add "no-op" aggregates e.g. "R12_AFR" is the sum of ["R12_AFR"]
279+
result.update({c: [c] for c in map(str, node.child)})
280+
281+
return {dim: result}
282+
283+
247284
def remove_ts(
248285
scenario: ixmp.Scenario,
249286
config: Optional[dict] = None,

message_ix_models/tests/project/ssp/test_transport.py

+38-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from collections.abc import Callable, Hashable
12
from typing import TYPE_CHECKING
23

4+
import numpy as np
35
import pandas as pd
46
import pytest
57

@@ -98,24 +100,31 @@ def _to_long(df):
98100

99101
# Diff data:
100102
# - Outer merge.
103+
# - Fill NaNs resulting from insert_nans()
101104
# - Compute diff and select rows where diff is larger than a certain value
102-
df = df_in.merge(df_out, how="outer", on=dims, suffixes=("_in", "_out")).query(
103-
"abs(value_out - value_in) > 1e-16"
105+
df = (
106+
df_in.merge(df_out, how="outer", on=dims, suffixes=("_in", "_out"))
107+
.fillna(0)
108+
.query("abs(value_out - value_in) > 1e-16")
104109
)
105110

106111
# Identify the directory from which IEA EWEB data is read
107112
iea_eweb_dir = web.dir_fallback(web.FILES[("IEA", "2024")][0])
108113
# True if the fuzzed test data are being used
109114
iea_eweb_test_data = iea_eweb_dir.match("message_ix_models/data/test/iea/web")
110115

116+
# All regions and "World" have modified values
117+
N_reg = {METHOD.A: 13, METHOD.B: 9, METHOD.C: 13}[method]
118+
assert N_reg <= len(df["Region"].unique())
119+
111120
# Number of modified values
112121
N_exp = {
113122
(METHOD.A, False): 10280,
114123
(METHOD.A, True): 10280,
115-
(METHOD.B, False): 4660,
116-
(METHOD.B, True): 3060,
117-
(METHOD.C, False): 3220,
118-
(METHOD.C, True): 3220,
124+
(METHOD.B, False): 5060,
125+
(METHOD.B, True): 3460,
126+
(METHOD.C, False): 3500,
127+
(METHOD.C, True): 3500,
119128
}[(method, iea_eweb_test_data)]
120129

121130
if N_exp != len(df):
@@ -155,6 +164,22 @@ def expected_variables(flag: int, method: METHOD) -> set[str]:
155164
return result
156165

157166

167+
def insert_nans(
168+
df: pd.DataFrame, variable_expr: str, year_cond: Callable[[Hashable], bool]
169+
) -> pd.DataFrame:
170+
"""Replace zeros with :py:`np.nan` in `df`.
171+
172+
This occurs only where:
173+
174+
1. The 'Variable' column contains a string that matches `variable_expr`.
175+
2. The `year_cond` returns :any:`True` for the column name.
176+
"""
177+
return df.where(
178+
~df.Variable.str.fullmatch(variable_expr),
179+
df.replace({c: {0: np.nan} for c in filter(year_cond, df.columns)}),
180+
)
181+
182+
158183
@get_computer.minimum_version
159184
def test_cli(tmp_path, mix_models_cli, test_context, input_xlsx_path) -> None:
160185
"""Code can be invoked from the command-line."""
@@ -218,7 +243,13 @@ def test_get_scenario_code(expected_id, model_name, scenario_name) -> None:
218243
@get_computer.minimum_version
219244
@pytest.mark.parametrize("method", METHOD_PARAM)
220245
def test_process_df(test_context, input_csv_path, method) -> None:
221-
df_in = pd.read_csv(input_csv_path)
246+
# - Read input data
247+
# - Replace some 0 values with np.nan to replicate conditions in calling code.
248+
df_in = pd.read_csv(input_csv_path).pipe(
249+
insert_nans,
250+
r"Emissions\|.*\|International Aviation",
251+
lambda c: str(c).isnumeric() and int(c) >= 2020,
252+
)
222253

223254
# Code runs
224255
df_out = process_df(df_in, method=method)

0 commit comments

Comments
 (0)