Skip to content
This repository was archived by the owner on Jan 31, 2025. It is now read-only.

Commit 016095c

Browse files
committed
Add PETSc event logging to generated code, reuses logic from PyOP2
1 parent b6a2e14 commit 016095c

File tree

4 files changed

+110
-66
lines changed

4 files changed

+110
-66
lines changed

pyop3/ir/lower.py

+20-29
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,15 @@
1717
from pyrsistent import freeze, pmap
1818

1919
from pyop3.array import HierarchicalArray
20-
from pyop3.array.harray import CalledMapVariable
2120
from pyop3.array.petsc import AbstractMat, Sparsity
2221
from pyop3.axtree import Axis, AxisComponent, AxisTree, AxisVariable, ContextFree
23-
from pyop3.axtree.tree import subst_layouts
2422
from pyop3.buffer import DistributedBuffer, NullBuffer, PackedBuffer
2523
from pyop3.config import config
2624
from pyop3.dtypes import IntType
27-
from pyop3.ir.transform import add_likwid_markers
28-
from pyop3.itree import (
29-
AffineSliceComponent,
30-
CalledMap,
31-
Index,
32-
IndexTree,
33-
LocalLoopIndex,
34-
LoopIndex,
35-
Map,
36-
Slice,
37-
Subset,
38-
TabulatedMapComponent,
39-
)
25+
from pyop3.ir.transform import with_likwid_markers, with_petsc_event
4026
from pyop3.itree.tree import (
41-
ContextFreeLoopIndex,
42-
IndexExpressionReplacer,
4327
LocalLoopIndexVariable,
4428
LoopIndexVariable,
45-
collect_shape_index_callback,
4629
)
4730
from pyop3.lang import (
4831
INC,
@@ -323,12 +306,12 @@ def set_temporary_shapes(self, shapes):
323306

324307

325308
class CodegenResult:
326-
def __init__(self, expr, ir, arg_replace_map):
309+
def __init__(self, expr, ir, arg_replace_map, *, compiler_parameters):
327310
self.expr = as_tuple(expr)
328311
self.ir = ir
329312
self.arg_replace_map = arg_replace_map
330313

331-
self._exec = compile_loopy(self.ir)
314+
self._exec = compile_loopy(self.ir, pyop3_compiler_parameters=compiler_parameters)
332315

333316
@cached_property
334317
def datamap(self):
@@ -401,6 +384,7 @@ class CompilerParameters:
401384
# NOTE: This sort of thing could have a default set from the config
402385
# dict (but do not use PYOP3_USE_LIKWID as that's a separate option).
403386
add_likwid_markers: bool = False
387+
add_petsc_event: bool = False
404388

405389

406390
def parse_compiler_parameters(compiler_parameters) -> CompilerParameters:
@@ -412,16 +396,18 @@ def parse_compiler_parameters(compiler_parameters) -> CompilerParameters:
412396

413397

414398
# prefer generate_code?
415-
def compile(expr: Instruction, name="mykernel", compiler_parameters=None):
399+
def compile(expr: Instruction, compiler_parameters=None):
416400
compiler_parameters = parse_compiler_parameters(compiler_parameters)
417401

418402
# preprocess expr before lowering
419403
from pyop3.transform import expand_implicit_pack_unpack, expand_loop_contexts
420404

405+
function_name = expr.name
406+
421407
cs_expr = expand_loop_contexts(expr)
422408
ctx = LoopyCodegenContext()
423-
for context, expr in cs_expr:
424-
expr = expand_implicit_pack_unpack(expr)
409+
for context, ex in cs_expr:
410+
ex = expand_implicit_pack_unpack(ex)
425411

426412
# add external loop indices as kernel arguments
427413
loop_indices = {}
@@ -441,7 +427,7 @@ def compile(expr: Instruction, name="mykernel", compiler_parameters=None):
441427
# FIXME currently assume that source and target exprs are the same, they are not!
442428
loop_indices[index] = (replace_map, replace_map)
443429

444-
for e in as_tuple(expr):
430+
for e in as_tuple(ex):
445431
# context manager?
446432
ctx.set_temporary_shapes(_collect_temporary_shapes(e))
447433
_compile(e, loop_indices, ctx)
@@ -480,24 +466,29 @@ def compile(expr: Instruction, name="mykernel", compiler_parameters=None):
480466
ctx.domains,
481467
ctx.instructions,
482468
ctx.arguments,
483-
name=name,
469+
name=function_name,
484470
target=LOOPY_TARGET,
485471
lang_version=LOOPY_LANG_VERSION,
486472
preambles=preambles,
487473
# options=lp.Options(check_dep_resolution=False),
488474
)
489475

476+
entrypoint = translation_unit.default_entrypoint
490477
if compiler_parameters.add_likwid_markers:
491-
translation_unit = add_likwid_markers(translation_unit)
478+
entrypoint = with_likwid_markers(entrypoint)
479+
if compiler_parameters.add_petsc_event:
480+
entrypoint = with_petsc_event(entrypoint)
481+
translation_unit = translation_unit.with_kernel(entrypoint)
492482

493-
tu = lp.merge((translation_unit, *ctx.subkernels))
483+
translation_unit = lp.merge((translation_unit, *ctx.subkernels))
494484

495485
# add callables
496486
# tu = lp.register_callable(tu, "bsearch", BinarySearchCallable())
497487

498-
tu = tu.with_entrypoints(name)
488+
# needed?
489+
translation_unit = translation_unit.with_entrypoints(entrypoint.name)
499490

500-
return CodegenResult(expr, tu, ctx.kernel_to_actual_rename_map)
491+
return CodegenResult(expr, translation_unit, ctx.kernel_to_actual_rename_map, compiler_parameters=compiler_parameters)
501492

502493

503494
# put into a class in transform.py?

pyop3/ir/transform.py

+39-7
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,55 @@
1+
import textwrap
2+
13
import loopy as lp
24

35

4-
def add_likwid_markers(knl):
6+
def with_likwid_markers(knl):
57
"""
68
See https://github.com/RRZE-HPC/likwid/wiki/TutorialMarkerC
79
"""
810
import pylikwid
911

10-
preambles = knl.preambles + (("99_likwid", "#include <likwid-marker.h>"),)
11-
1212
marker_name = knl.name
1313
pylikwid.markerregisterregion(marker_name)
1414

15+
preambles = [("99_likwid", "#include <likwid-marker.h>")]
1516
start_insn = lp.CInstruction((), f"LIKWID_MARKER_START(\"{marker_name}\");", id="likwid_start")
17+
stop_insn = lp.CInstruction((), f"LIKWID_MARKER_STOP(\"{marker_name}\");", id="likwid_stop")
18+
19+
return _with_region_markers(knl, start_insn, stop_insn, preambles)
20+
21+
22+
def with_petsc_event(knl):
23+
event_name = knl.name
24+
25+
26+
preambles = [
27+
(
28+
"99_petsc",
29+
textwrap.dedent(f"""
30+
#include <petsclog.h>
31+
32+
// Prepare a dummy event so that things compile. This is overwridden using
33+
// the object file.
34+
PetscLogEvent id_{event_name} = -1;
35+
""")
36+
)
37+
]
38+
39+
start_insn = lp.CInstruction((), f"PetscLogEventBegin(id_{event_name}, 0, 0, 0, 0);", id="petsc_log_begin")
40+
stop_insn = lp.CInstruction((), f"PetscLogEventEnd(id_{event_name}, 0, 0, 0, 0);", id="petsc_log_end")
41+
42+
return _with_region_markers(knl, start_insn, stop_insn, preambles)
43+
44+
45+
def _with_region_markers(knl, start_insn, stop_insn, preambles):
46+
preambles = knl.preambles + tuple(preambles)
47+
48+
assert start_insn.id is not None
1649
insns = (
17-
[start_insn]
18-
+ [insn.copy(depends_on=insn.depends_on | {"likwid_start"}) for insn in knl.instructions]
50+
start_insn,
51+
*(insn.copy(depends_on=insn.depends_on | {start_insn.id}) for insn in knl.instructions),
52+
stop_insn.copy(depends_on=frozenset(insn.id for insn in knl.instructions)),
1953
)
20-
stop_insn = lp.CInstruction((), f"LIKWID_MARKER_STOP(\"{marker_name}\");", id="likwid_stop", depends_on=frozenset(insn.id for insn in insns))
21-
insns.append(stop_insn)
2254

2355
return knl.copy(preambles=preambles, instructions=insns)

pyop3/lang.py

+36-24
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,11 @@ def datamap(self):
116116
# pass
117117

118118

119+
_DEFAULT_LOOP_NAME = "pyop3_loop"
120+
121+
119122
class Loop(Instruction):
120-
fields = Instruction.fields | {"index", "statements", "compiler_parameters"}
123+
fields = Instruction.fields | {"index", "statements", "compiler_parameters", "name"}
121124

122125
# doubt that I need an ID here
123126
id_generator = pytools.UniqueNameGenerator()
@@ -127,33 +130,35 @@ def __init__(
127130
index: LoopIndex,
128131
statements: Iterable[Instruction],
129132
*,
133+
name: str = _DEFAULT_LOOP_NAME,
130134
compiler_parameters=None,
131135
**kwargs,
132136
):
133137
super().__init__(**kwargs)
134138
self.index = index
135139
self.statements = as_tuple(statements)
140+
self.name = name
136141
self.compiler_parameters = compiler_parameters
137142

138143
def __call__(self, **kwargs):
139144
# TODO just parse into ContextAwareLoop and call that
140145
from pyop3.ir.lower import compile
141146
from pyop3.itree.tree import partition_iterset
142147

148+
code = compile(self, compiler_parameters=self.compiler_parameters)
149+
143150
if self.is_parallel:
144151
# FIXME: The partitioning code does not seem to always run properly
145152
# so for now do all the transfers in advance.
146153
# interleave computation and communication
147-
# new_index, (icore, iroot, ileaf) = partition_iterset(
148-
# self.index, [a for a, _ in self.function_arguments]
149-
# )
154+
new_index, (icore, iroot, ileaf) = partition_iterset(
155+
self.index, [a for a, _ in self.function_arguments]
156+
)
150157
#
151158
# assert self.index.id == new_index.id
152159
#
153160
# # substitute subsets into loopexpr, should maybe be done in partition_iterset
154161
# parallel_loop = self.copy(index=new_index)
155-
# code = compile(parallel_loop)
156-
code = compile(self, compiler_parameters=self.compiler_parameters)
157162

158163
# interleave communication and computation
159164
initializers, reductions, broadcasts = self._array_updates()
@@ -162,40 +167,43 @@ def __call__(self, **kwargs):
162167
init()
163168

164169
# replace the parallel axis subset with one for the specific indices here
165-
# extent = just_one(icore.axes.root.components).count
166-
# core_kwargs = merge_dicts(
167-
# [kwargs, {icore.name: icore, extent.name: extent}]
168-
# )
169-
# code(**core_kwargs)
170+
extent = just_one(icore.axes.root.components).count
171+
core_kwargs = merge_dicts(
172+
[kwargs, {icore.name: icore, extent.name: extent}]
173+
)
174+
175+
with PETSc.Log.Event(f"compute_{self.name}_core"):
176+
code(**core_kwargs)
170177

171178
# await reductions
172179
for red in reductions:
173180
red()
174181

175182
# roots
176183
# replace the parallel axis subset with one for the specific indices here
177-
# root_extent = just_one(iroot.axes.root.components).count
178-
# root_kwargs = merge_dicts(
179-
# [kwargs, {icore.name: iroot, extent.name: root_extent}]
180-
# )
181-
# code(**root_kwargs)
184+
root_extent = just_one(iroot.axes.root.components).count
185+
root_kwargs = merge_dicts(
186+
[kwargs, {icore.name: iroot, extent.name: root_extent}]
187+
)
188+
with PETSc.Log.Event(f"compute_{self.name}_root"):
189+
code(**root_kwargs)
182190

183191
# await broadcasts
184192
for broadcast in broadcasts:
185193
broadcast()
186194

187195
# leaves
188-
# leaf_extent = just_one(ileaf.axes.root.components).count
189-
# leaf_kwargs = merge_dicts(
190-
# [kwargs, {icore.name: ileaf, extent.name: leaf_extent}]
191-
# )
192-
# code(**leaf_kwargs)
193-
194-
code(**kwargs)
196+
leaf_extent = just_one(ileaf.axes.root.components).count
197+
leaf_kwargs = merge_dicts(
198+
[kwargs, {icore.name: ileaf, extent.name: leaf_extent}]
199+
)
200+
with PETSc.Log.Event(f"compute_{self.name}_leaf"):
201+
code(**leaf_kwargs)
195202

196203
# also may need to eagerly assemble Mats, or be clever and spike the accessors?
197204
else:
198-
compile(self, compiler_parameters=self.compiler_parameters)(**kwargs)
205+
with PETSc.Log.Event(f"compute_{self.name}_serial"):
206+
code(**kwargs)
199207

200208
@cached_property
201209
def loopy_code(self):
@@ -345,6 +353,10 @@ def _init_nil():
345353

346354
return tuple(initializers), tuple(reductions), tuple(broadcasts)
347355

356+
@cached_property
357+
def datamap(self):
358+
return self.index.datamap | merge_dicts(stmt.datamap for stmt in self.statements)
359+
348360

349361
class ContextAwareLoop(ContextAwareInstruction):
350362
fields = Instruction.fields | {"index", "statements"}

pyop3/target.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555

5656
@mpi.collective
57-
def compile_loopy(kernel, **kwargs):
57+
def compile_loopy(translation_unit, *, pyop3_compiler_parameters, **kwargs):
5858
"""Build a shared library and return a function pointer from it.
5959
6060
:arg jitmodule: The JIT Module which can generate the code to compile, or
@@ -71,8 +71,8 @@ def compile_loopy(kernel, **kwargs):
7171
:kwarg comm: Optional communicator to compile the code on (only
7272
rank 0 compiles code) (defaults to pyop2.mpi.COMM_WORLD).
7373
"""
74-
code = lp.generate_code_v2(kernel).device_code()
75-
argtypes = [ctypes.c_voidp for _ in kernel.default_entrypoint.args]
74+
code = lp.generate_code_v2(translation_unit).device_code()
75+
argtypes = [ctypes.c_voidp for _ in translation_unit.default_entrypoint.args]
7676
restype = None
7777

7878
# ideally move this logic somewhere else
@@ -88,21 +88,30 @@ def compile_loopy(kernel, **kwargs):
8888
# + tuple(self.local_kernel.ldargs)
8989
)
9090

91+
# NOTE: no - instead of this inspect the compiler parameters!!!
9192
# TODO: Make some sort of function in config.py
9293
if "LIKWID_MODE" in os.environ:
9394
cppargs += ("-DLIKWID_PERFMON",)
9495
ldargs += ("-llikwid",)
9596

96-
return compile_c(
97+
func, lib = compile_c(
9798
code,
98-
kernel.default_entrypoint.name,
99+
translation_unit.default_entrypoint.name,
99100
argtypes,
100101
restype,
101102
extra_compiler_flags=cppargs,
102103
extra_linker_flags=ldargs,
103104
**kwargs,
104105
)
105106

107+
if pyop3_compiler_parameters.add_petsc_event:
108+
# Create the event in python and then set in the shared library to avoid
109+
# allocating memory over and over again in the C kernel.
110+
event_name = translation_unit.default_entrypoint.name
111+
ctypes.c_int.in_dll(lib, f"id_{event_name}").value = PETSc.Log.Event(event_name).id
112+
113+
return func
114+
106115

107116
def compile_c(code: str, name, argtypes, restype, **kwargs):
108117
compiler = _compiler if _compiler else sniff_compiler(config["cc"])
@@ -511,7 +520,7 @@ def compile_library(self, code: str, name: str, argtypes, restype):
511520
fn = getattr(dll, name)
512521
fn.argtypes = argtypes
513522
fn.restype = restype
514-
return fn
523+
return fn, dll
515524

516525

517526
class MacClangCompiler(Compiler):

0 commit comments

Comments
 (0)