GhentAnalysis
diff --git a/‎.all-contributorsrc‎
Lines changed: 15 additions & 3 deletions b/‎.all-contributorsrc‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎analysis_templates/cms_minimal/__cf_module_name__/plotting/example.py‎
Lines changed: 9 additions & 4 deletions b/‎analysis_templates/cms_minimal/__cf_module_name__/plotting/example.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎analysis_templates/cms_minimal/law.cfg‎
Lines changed: 25 additions & 12 deletions b/‎analysis_templates/cms_minimal/law.cfg‎
Lines changed: 25 additions & 12 deletions
diff --git a/‎analysis_templates/ghent_template/__cf_module_name__/plotting/example.py‎
Lines changed: 9 additions & 4 deletions b/‎analysis_templates/ghent_template/__cf_module_name__/plotting/example.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎analysis_templates/ghent_template/__cf_module_name__/production/default.py‎
Lines changed: 4 additions & 2 deletions b/‎analysis_templates/ghent_template/__cf_module_name__/production/default.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎analysis_templates/ghent_template/__cf_module_name__/selection/default.py‎
Lines changed: 8 additions & 2 deletions b/‎analysis_templates/ghent_template/__cf_module_name__/selection/default.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎analysis_templates/ghent_template/__cf_module_name__/selection/objects.py‎
Lines changed: 4 additions & 4 deletions b/‎analysis_templates/ghent_template/__cf_module_name__/selection/objects.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎bin/cf_inspect‎
Lines changed: 11 additions & 1 deletion b/‎bin/cf_inspect‎
Lines changed: 11 additions & 1 deletion
@@ -71,7 +71,8 @@
       "profile": "https://github.com/Bogdan-Wiederspan",
       "contributions": [
         "code",
-        "test"
+        "test",
+        "review"
       ]
     },
     {
@@ -153,16 +154,27 @@
       "avatar_url": "https://avatars.githubusercontent.com/u/99343616?v=4",
       "profile": "https://github.com/aalvesan",
       "contributions": [
-        "code"
+        "code",
+        "review"
       ]
-    }, {
+    },
+    {
       "login": "philippgadow",
       "name": "philippgadow",
       "avatar_url": "https://avatars.githubusercontent.com/u/6804366?v=4",
       "profile": "https://github.com/philippgadow",
       "contributions": [
         "code"
       ]
+    },
+    {
+      "login": "LuSchaller",
+      "name": "Lukas Schaller",
+      "avatar_url": "https://avatars.githubusercontent.com/u/30951523?v=4",
+      "profile": "https://github.com/LuSchaller",
+      "contributions": [
+        "code"
+      ]
     }
   ],
   "commitType": "docs"
 
@@ -138,7 +138,7 @@ For a better overview of the tasks that are triggered by the commands below, che
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/dsavoiu"><img src="https://avatars.githubusercontent.com/u/17005255?v=4?s=100" width="100px;" alt="Daniel Savoiu"/><br /><sub><b>Daniel Savoiu</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=dsavoiu" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/pulls?q=is%3Apr+reviewed-by%3Adsavoiu" title="Reviewed Pull Requests">👀</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/pkausw"><img src="https://avatars.githubusercontent.com/u/26219567?v=4?s=100" width="100px;" alt="pkausw"/><br /><sub><b>pkausw</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=pkausw" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/pulls?q=is%3Apr+reviewed-by%3Apkausw" title="Reviewed Pull Requests">👀</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/nprouvost"><img src="https://avatars.githubusercontent.com/u/49162277?v=4?s=100" width="100px;" alt="nprouvost"/><br /><sub><b>nprouvost</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=nprouvost" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/commits?author=nprouvost" title="Tests">⚠️</a></td>
-      <td align="center" valign="top" width="14.28%"><a href="https://github.com/Bogdan-Wiederspan"><img src="https://avatars.githubusercontent.com/u/79155113?v=4?s=100" width="100px;" alt="Bogdan-Wiederspan"/><br /><sub><b>Bogdan-Wiederspan</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=Bogdan-Wiederspan" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/commits?author=Bogdan-Wiederspan" title="Tests">⚠️</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/Bogdan-Wiederspan"><img src="https://avatars.githubusercontent.com/u/79155113?v=4?s=100" width="100px;" alt="Bogdan-Wiederspan"/><br /><sub><b>Bogdan-Wiederspan</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=Bogdan-Wiederspan" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/commits?author=Bogdan-Wiederspan" title="Tests">⚠️</a> <a href="https://github.com/columnflow/columnflow/pulls?q=is%3Apr+reviewed-by%3ABogdan-Wiederspan" title="Reviewed Pull Requests">👀</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/kramerto"><img src="https://avatars.githubusercontent.com/u/18616159?v=4?s=100" width="100px;" alt="Tobias Kramer"/><br /><sub><b>Tobias Kramer</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=kramerto" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/pulls?q=is%3Apr+reviewed-by%3Akramerto" title="Reviewed Pull Requests">👀</a></td>
     </tr>
     <tr>
@@ -151,8 +151,9 @@ For a better overview of the tasks that are triggered by the commands below, che
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/JulesVandenbroeck"><img src="https://avatars.githubusercontent.com/u/93740577?v=4?s=100" width="100px;" alt="JulesVandenbroeck"/><br /><sub><b>JulesVandenbroeck</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=JulesVandenbroeck" title="Code">💻</a></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="14.28%"><a href="https://github.com/aalvesan"><img src="https://avatars.githubusercontent.com/u/99343616?v=4?s=100" width="100px;" alt="Ana Andrade"/><br /><sub><b>Ana Andrade</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=aalvesan" title="Code">💻</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/aalvesan"><img src="https://avatars.githubusercontent.com/u/99343616?v=4?s=100" width="100px;" alt="Ana Andrade"/><br /><sub><b>Ana Andrade</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=aalvesan" title="Code">💻</a> <a href="https://github.com/columnflow/columnflow/pulls?q=is%3Apr+reviewed-by%3Aaalvesan" title="Reviewed Pull Requests">👀</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/philippgadow"><img src="https://avatars.githubusercontent.com/u/6804366?v=4?s=100" width="100px;" alt="philippgadow"/><br /><sub><b>philippgadow</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=philippgadow" title="Code">💻</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/LuSchaller"><img src="https://avatars.githubusercontent.com/u/30951523?v=4?s=100" width="100px;" alt="Lukas Schaller"/><br /><sub><b>Lukas Schaller</b></sub></a><br /><a href="https://github.com/columnflow/columnflow/commits?author=LuSchaller" title="Code">💻</a></td>
     </tr>
   </tbody>
 </table>
 
@@ -14,14 +14,16 @@
     apply_variable_settings,
     apply_process_settings,
 )
+from columnflow.types import TYPE_CHECKING
 
-hist = maybe_import("hist")
 np = maybe_import("numpy")
-mpl = maybe_import("matplotlib")
-plt = maybe_import("matplotlib.pyplot")
-mplhep = maybe_import("mplhep")
 od = maybe_import("order")
 
+# import hist, matplotlib... for type checking only like this! import them then also locallu.
+if TYPE_CHECKING:
+    hist = maybe_import("hist")
+    plt = maybe_import("matplotlib.pyplot")
+
 
 def my_plot1d_func(
     hists: OrderedDict[od.Process, hist.Hist],
@@ -45,6 +47,9 @@ def my_plot1d_func(
             --plot-function __cf_module_name__.plotting.example.my_plot1d_func \
             --general-settings example_param=some_text
     """
+    import mplhep
+    import matplotlib.pyplot as plt
+
     # we can add arbitrary parameters via the `general_settings` parameter to access them in the
     # plotting function. They are automatically parsed either to a bool, float, or string
     print(f"the example_param has been set to '{example_param}' (type: {type(example_param)})")
 
@@ -27,10 +27,10 @@ default_analysis: __cf_module_name__.config.analysis___cf_short_name_lc__.analys
 default_config: run2_2017_nano_v9
 default_dataset: st_tchannel_t_4f_powheg
 
-calibration_modules: columnflow.calibration.cms.{jets,met,tau}, __cf_module_name__.calibration.example
+calibration_modules: columnflow.calibration.cms.{jets,met,tau,egamma,muon}, __cf_module_name__.calibration.example
 selection_modules: columnflow.selection.empty, columnflow.selection.cms.{json_filter,met_filters}, __cf_module_name__.selection.example
 reduction_modules: columnflow.reduction.default, __cf_module_name__.reduction.example
-production_modules: columnflow.production.{categories,matching,normalization,processes}, columnflow.production.cms.{btag,electron,jet,matching,mc_weight,muon,pdf,pileup,scale,parton_shower,seeds}, __cf_module_name__.production.example
+production_modules: columnflow.production.{categories,matching,normalization,processes}, columnflow.production.cms.{btag,electron,jet,matching,mc_weight,muon,pdf,pileup,scale,parton_shower,seeds,gen_particles}, __cf_module_name__.production.example
 categorization_modules: __cf_module_name__.categorization.example
 hist_production_modules: columnflow.histogramming.default, __cf_module_name__.histogramming.example
 ml_modules: columnflow.ml, __cf_module_name__.ml.example
@@ -56,12 +56,16 @@ default_create_selection_hists: False
 # wether or not the ensure_proxy decorator should be skipped, even if used by task's run methods
 skip_ensure_proxy: False
 
+# the name of a sandbox to use for tasks in remote jobs initially (invoked with claw when set)
+default_remote_claw_sandbox: None
+
 # some remote workflow parameter defaults
 # (resources like memory and disk can also be set in [resources] with more granularity)
 htcondor_flavor: $CF_HTCONDOR_FLAVOR
 htcondor_share_software: False
 htcondor_memory: -1
 htcondor_disk: -1
+htcondor_runtime: 3h
 slurm_flavor: $CF_SLURM_FLAVOR
 slurm_partition: $CF_SLURM_PARTITION
 
@@ -70,6 +74,9 @@ chunked_io_chunk_size: 100000
 chunked_io_pool_size: 2
 chunked_io_debug: False
 
+# settings for merging parquet files in several locations
+merging_row_group_size: 50000
+
 # csv list of task families that inherit from ChunkedReaderMixin and whose output arrays should be
 # checked (raising an exception) for non-finite values before saving them to disk
 check_finite_output: cf.CalibrateEvents, cf.SelectEvents, cf.ReduceEvents, cf.ProduceColumns
@@ -98,8 +105,8 @@ lfn_sources: wlcg_fs_t2b_redirector, wlcg_fs_infn_redirector, wlcg_fs_global_red
 # output locations per task family
 # the key can consist of multple underscore-separated parts, that can each be patterns or regexes
 # these parts are used for the lookup from within tasks and can contain (e.g.) the analysis name,
-# the config name, the task family, the dataset name, or the shift name
-# (see AnalysisTask.get_config_lookup_keys() - and subclasses - for the exact order)
+# the config name, the task family, the dataset name, or the shift name, for more info, see
+# https://columnflow.readthedocs.io/en/latest/user_guide/best_practices.html#selecting-output-locations
 # values can have the following format:
 # for local targets   : "local[, LOCAL_FS_NAME or STORE_PATH][, store_parts_modifier]"
 # for remote targets  : "wlcg[, WLCG_FS_NAME][, store_parts_modifier]"
@@ -108,22 +115,22 @@ lfn_sources: wlcg_fs_t2b_redirector, wlcg_fs_infn_redirector, wlcg_fs_global_red
 # the "store_parts_modifiers" can be the name of a function in the "store_parts_modifiers" aux dict
 # of the analysis instance, which is called with an output's store parts of an output to modify them
 # example:
-; run3_2023__cf.CalibrateEvents__nomin*: local
-; cf.CalibrateEvents: wlcg
+; cfg_run3_2023__task_cf.CalibrateEvents__shift_nomin*: local
+; task_cf.CalibrateEvents: wlcg
 
 
 [versions]
 
 # default versions of specific tasks to pin
 # the key can consist of multple underscore-separated parts, that can each be patterns or regexes
 # these parts are used for the lookup from within tasks and can contain (e.g.) the analysis name,
-# the config name, the task family, the dataset name, or the shift name
-# (see AnalysisTask.get_config_lookup_keys() - and subclasses - for the exact order)
+# the config name, the task family, the dataset name, or the shift name, for more info, see
+# https://columnflow.readthedocs.io/en/latest/user_guide/best_practices.html#pinned-versions-in-the-analysis-config-or-law-cfg-file
 # note:
 # this lookup is skipped if the lookup based on the config instance's auxiliary data succeeded
 # example:
-; run3_2023__cf.CalibrateEvents__nomin*: prod1
-; cf.CalibrateEvents: prod2
+; cfg_run3_2023__task_cf.CalibrateEvents__shift_nomin*: prod1
+; task_cf.CalibrateEvents: prod2
 
 
 [resources]
@@ -135,8 +142,8 @@ lfn_sources: wlcg_fs_t2b_redirector, wlcg_fs_infn_redirector, wlcg_fs_global_red
 # by the respective parameter instance at runtime
 # same as for [versions], the order of options is important as it defines the resolution order
 # example:
-; run3_2023__cf.CalibrateEvents__nomin*: htcondor_memory=5GB
-; run3_2023__cf.CalibrateEvents: htcondor_memory=2GB
+; cfg_run3_2023__task_cf.CalibrateEvents__shift_nomin*: htcondor_memory=5GB
+; cfg_run3_2023__task_cf.CalibrateEvents: htcondor_memory=2GB
 
 
 [job]
@@ -159,6 +166,12 @@ remote_lcg_setup_el9: /cvmfs/grid.cern.ch/alma9-ui-test/etc/profile.d/setup-alma
 remote_lcg_setup_force: False
 
 
+[target]
+
+# when removing target collections, use multi-threading
+collection_remove_threads: 2
+
+
 [local_fs]
 
 base: /
 
@@ -14,14 +14,16 @@
     apply_variable_settings,
     apply_process_settings,
 )
+from columnflow.types import TYPE_CHECKING
 
-hist = maybe_import("hist")
 np = maybe_import("numpy")
-mpl = maybe_import("matplotlib")
-plt = maybe_import("matplotlib.pyplot")
-mplhep = maybe_import("mplhep")
 od = maybe_import("order")
 
+# import hist, matplotlib... for type checking only like this! import them then also locallu.
+if TYPE_CHECKING:
+    hist = maybe_import("hist")
+    plt = maybe_import("matplotlib.pyplot")
+
 
 def my_plot1d_func(
     hists: OrderedDict[od.Process, hist.Hist],
@@ -45,6 +47,9 @@ def my_plot1d_func(
             --plot-function __cf_module_name__.plotting.example.my_plot1d_func \
             --general-settings example_param=some_text
     """
+    import mplhep
+    import matplotlib.pyplot as plt
+
     # we can add arbitrary parameters via the `general_settings` parameter to access them in the
     # plotting function. They are automatically parsed either to a bool, float, or string
     print(f"The example_param has been set to '{example_param}' (type: {type(example_param)})")
 
@@ -16,8 +16,10 @@
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
-coffea = maybe_import("coffea")
-maybe_import("coffea.nanoevents.methods.nanoaod")
+
+# do not import coffea globally! Do this inside the function
+# coffea = maybe_import("coffea")
+# maybe_import("coffea.nanoevents.methods.nanoaod")
 
 
 @producer(
 
@@ -28,15 +28,21 @@
 from __cf_short_name_lc__.selection.stats import __cf_short_name_lc___increment_stats
 from __cf_short_name_lc__.selection.trigger import trigger_selection
 
+# only numpy and awkward are okay to import globally
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
-coffea = maybe_import("coffea")
-maybe_import("coffea.nanoevents.methods.nanoaod")
+
+# do not import coffea globally! Do this inside the function
+# coffea = maybe_import("coffea")
+# maybe_import("coffea.nanoevents.methods.nanoaod")
 
 logger = law.logger.get_logger(__name__)
 
 
 def TetraVec(arr: ak.Array) -> ak.Array:
+    import coffea
+    import coffea.nanoevents.methods.nanoaod
+
     TetraVec = ak.zip({"pt": arr.pt, "eta": arr.eta, "phi": arr.phi, "mass": arr.mass},
     with_name="PtEtaPhiMLorentzVector",
     behavior=coffea.nanoevents.methods.vector.behavior)
 
@@ -10,7 +10,7 @@
 from columnflow.util import maybe_import, four_vec
 from columnflow.columnar_util import set_ak_column
 from columnflow.selection import Selector, SelectionResult, selector
-from columnflow.reduction.util import masked_sorted_indices
+from columnflow.columnar_util import sorted_indices_from_mask
 
 ak = maybe_import("awkward")
 
@@ -53,7 +53,7 @@ def muon_object(
         steps={},
         objects={
             "Muon": {
-                "Muon": masked_sorted_indices(mu_mask, muon.pt)
+                "Muon": sorted_indices_from_mask(mu_mask, muon.pt)
             }
         },
     )
@@ -108,7 +108,7 @@ def electron_object(
         steps={},
         objects={
             "Electron": {
-                "Electron": masked_sorted_indices(e_mask, electron.pt)
+                "Electron": sorted_indices_from_mask(e_mask, electron.pt)
             }
         },
     )
@@ -142,7 +142,7 @@ def jet_object(
         (dR_mask)
     )
 
-    jet_indices = masked_sorted_indices(jet_mask, events.Jet.pt)
+    jet_indices = sorted_indices_from_mask(jet_mask, events.Jet.pt)
     n_jets = ak.sum(jet_mask, axis=-1)
 
     return events, SelectionResult(
 
@@ -1,15 +1,25 @@
 #!/bin/sh
 action () {
+    # local variables
     local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
     local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
     local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
 
+    # check arguments
     # [ "$#" -eq 0 ] && {
     #     echo "ERROR: at least one file must be provided"
     #     return 1
     # }
 
-    cf_sandbox venv_columnar_dev python "${this_dir}/cf_inspect.py" "$@"
+    # determine the sandbox to use
+    local cf_inspect_sandbox="${CF_INSPECT_SANDBOX:-venv_columnar_dev}"
+
+    # run the inspection script, potentially switching to the sandbox if not already in it
+    if [ "${CF_VENV_NAME}" = "${cf_inspect_sandbox}" ]; then
+        python "${this_dir}/cf_inspect.py" "$@"
+    else
+        cf_sandbox "${cf_inspect_sandbox}" python "${this_dir}/cf_inspect.py" "$@"
+    fi
 }
 
 action "$@"
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`	`from columnflow.util import maybe_import, four_vec`
`11`	`11`	`from columnflow.columnar_util import set_ak_column`
`12`	`12`	`from columnflow.selection import Selector, SelectionResult, selector`
`13`		`-from columnflow.reduction.util import masked_sorted_indices`
	`13`	`+from columnflow.columnar_util import sorted_indices_from_mask`
`14`	`14`
`15`	`15`	`ak = maybe_import("awkward")`
`16`	`16`
`@@ -53,7 +53,7 @@ def muon_object(`
`53`	`53`	`steps={},`
`54`	`54`	`objects={`
`55`	`55`	`"Muon": {`
`56`		`- "Muon": masked_sorted_indices(mu_mask, muon.pt)`
	`56`	`+ "Muon": sorted_indices_from_mask(mu_mask, muon.pt)`
`57`	`57`	`}`
`58`	`58`	`},`
`59`	`59`	`)`
`@@ -108,7 +108,7 @@ def electron_object(`
`108`	`108`	`steps={},`
`109`	`109`	`objects={`
`110`	`110`	`"Electron": {`
`111`		`- "Electron": masked_sorted_indices(e_mask, electron.pt)`
	`111`	`+ "Electron": sorted_indices_from_mask(e_mask, electron.pt)`
`112`	`112`	`}`
`113`	`113`	`},`
`114`	`114`	`)`
`@@ -142,7 +142,7 @@ def jet_object(`
`142`	`142`	`(dR_mask)`
`143`	`143`	`)`
`144`	`144`
`145`		`- jet_indices = masked_sorted_indices(jet_mask, events.Jet.pt)`
	`145`	`+ jet_indices = sorted_indices_from_mask(jet_mask, events.Jet.pt)`
`146`	`146`	`n_jets = ak.sum(jet_mask, axis=-1)`
`147`	`147`
`148`	`148`	`return events, SelectionResult(`