Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,20 @@
from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, _NoVerbosity
from Deeploy.TilingExtension.AsyncDma import AsyncDma
from Deeploy.TilingExtension.CodeTransformationPasses.DoubleBufferingTilingCodeGeneration import \
DoubleBufferingTilingCodeGeneration
DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn
from Deeploy.TilingExtension.CodeTransformationPasses.SingleBufferingTilingCodeGeneration import \
SingleBufferingTilingCodeGeneration
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import DoubleBufferingTilingMixIn, \
ProfilingDoubleBufferingTilingMixIn, ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingMixIn
ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingCodeGeneration


class PULPClusterTilingGenerationSB(SingleBufferingTilingCodeGeneration, SingleBufferingTilingMixIn):
class PULPClusterTilingGenerationSB(SingleBufferingTilingCodeGeneration):
pass


class ProfilingPULPClusterTilingGenerationSB(SingleBufferingTilingCodeGeneration, ProfilingSingleBufferingTilingMixIn):
pass
Comment on lines 19 to 20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Fix MRO so profiling mixins override base methods.

Place Profiling*MixIn before the CodeGeneration base.

-class ProfilingPULPClusterTilingGenerationSB(SingleBufferingTilingCodeGeneration, ProfilingSingleBufferingTilingMixIn):
+class ProfilingPULPClusterTilingGenerationSB(ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingCodeGeneration):
     pass
@@
-class ProfilingPULPClusterTilingGenerationDB(DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn):
+class ProfilingPULPClusterTilingGenerationDB(ProfilingDoubleBufferingTilingMixIn, DoubleBufferingTilingCodeGeneration):
     pass

Also applies to: 27-28

🤖 Prompt for AI Agents
In Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py around
lines 19-20 (and similarly lines 27-28), the profiling mixin is declared after
the code-generation base which prevents the mixin from overriding base methods
due to MRO; swap the inheritance order so the Profiling*MixIn comes before the
CodeGeneration base (e.g., class
ProfilingPULPClusterTilingGenerationSB(ProfilingSingleBufferingTilingMixIn,
SingleBufferingTilingCodeGeneration): pass) and apply the same swap to the other
affected class definition so profiling methods take precedence.



class PULPClusterTilingGenerationDB(DoubleBufferingTilingCodeGeneration, DoubleBufferingTilingMixIn):
class PULPClusterTilingGenerationDB(DoubleBufferingTilingCodeGeneration):
pass


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,20 @@
from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, _NoVerbosity
from Deeploy.TilingExtension.AsyncDma import AsyncDma
from Deeploy.TilingExtension.CodeTransformationPasses.DoubleBufferingTilingCodeGeneration import \
DoubleBufferingTilingCodeGeneration
DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn
from Deeploy.TilingExtension.CodeTransformationPasses.SingleBufferingTilingCodeGeneration import \
SingleBufferingTilingCodeGeneration
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import DoubleBufferingTilingMixIn, \
ProfilingDoubleBufferingTilingMixIn, ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingMixIn
ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingCodeGeneration


class PULPL3TilingGenerationSB(SingleBufferingTilingCodeGeneration, SingleBufferingTilingMixIn):
class PULPL3TilingGenerationSB(SingleBufferingTilingCodeGeneration):
pass


class ProfilingPULPL3TilingGenerationSB(SingleBufferingTilingCodeGeneration, ProfilingSingleBufferingTilingMixIn):
pass
Comment on lines 19 to 20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Fix MRO so profiling mixins override base methods.

Reorder bases: mixin first.

-class ProfilingPULPL3TilingGenerationSB(SingleBufferingTilingCodeGeneration, ProfilingSingleBufferingTilingMixIn):
+class ProfilingPULPL3TilingGenerationSB(ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingCodeGeneration):
     pass
@@
-class ProfilingPULPL3TilingGenerationDB(DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn):
+class ProfilingPULPL3TilingGenerationDB(ProfilingDoubleBufferingTilingMixIn, DoubleBufferingTilingCodeGeneration):
     pass

Also applies to: 27-28

🤖 Prompt for AI Agents
In Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPL3Tiling.py around
lines 19-20 and 27-28 the profiling mixin(s) are listed after the concrete
SingleBufferingTilingCodeGeneration base, causing the MRO to pick base methods
over the mixin overrides; reorder the base classes so the
ProfilingSingleBufferingTilingMixIn (and any other profiling mixins) appear
first in each class definition (mixin before
SingleBufferingTilingCodeGeneration) so the mixin methods take precedence.



class PULPL3TilingGenerationDB(DoubleBufferingTilingCodeGeneration, DoubleBufferingTilingMixIn):
class PULPL3TilingGenerationDB(DoubleBufferingTilingCodeGeneration):
pass


Expand Down
11 changes: 9 additions & 2 deletions Deeploy/Targets/PULPOpen/DMA/L3Dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,16 @@

class L3DmaFuture(Future):

_initTemplate = NodeTemplate("pi_cl_ram_req_t ${name};")
_initTemplate = NodeTemplate("pi_cl_ram_req_t ${name} = {0};")

_deinitTemplate = NodeTemplate("")
_waitTemplate = NodeTemplate("pi_cl_ram_copy_wait(&${name});")

_allocTemplate = NodeTemplate("")

_waitTemplate = NodeTemplate("""
if (${name}.size != 0) {
pi_cl_ram_copy_wait(&${name});
}""")


class L3Dma(AsyncDma):
Expand Down
14 changes: 9 additions & 5 deletions Deeploy/Targets/PULPOpen/DMA/MchanDma.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
from typing import Dict, Tuple

from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation, VariableBuffer
from Deeploy.TilingExtension.AsyncDma import AsyncDma, DmaDirection, Future, TensorGroupWaitingStrategy
from Deeploy.TilingExtension.AsyncDma import AsyncDma, DirectionWaitingStrategy, DmaDirection, Future


class MchanChannelFuture(Future):

_initTemplate = NodeTemplate("uint32_t ${name} = mchan_channel_alloc();")
_deinitTemplate = NodeTemplate("mchan_channel_free(${name});")
_waitTemplate = NodeTemplate("mchan_channel_wait(${name});")
_initTemplate = NodeTemplate("uint32_t ${name} = (uint32_t) -1;")

_deinitTemplate = NodeTemplate("")

_allocTemplate = NodeTemplate("${name} = mchan_channel_alloc();")

_waitTemplate = NodeTemplate("mchan_channel_wait(${name});\nmchan_channel_free(${name});")


class MchanDma(AsyncDma):
Expand All @@ -22,7 +26,7 @@ class MchanDma(AsyncDma):
1: NodeTemplate("mchan_transfer_1d(${cmd}, ${loc}, ${ext});"),
2: NodeTemplate("mchan_transfer_2d_ext_strided(${cmd}, ${loc}, ${ext}, ${size_1d}, ${stride_2d});"),
}
_waitingStrategy = TensorGroupWaitingStrategy(MchanChannelFuture, "channel_id")
_waitingStrategy = DirectionWaitingStrategy(MchanChannelFuture, "channel")

def __init__(self, transferTemplates: Dict[int, NodeTemplate] = _transferTemplates) -> None:
super().__init__(transferTemplates)
Expand Down
3 changes: 1 addition & 2 deletions Deeploy/Targets/Snitch/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from Deeploy.Targets.Generic.Templates import iNoNormTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, GEMMChecker, RQAddChecker, SoftmaxChecker, iNoNormChecker
from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling, SnitchCoreFilterPass, \
SnitchProfileExecutionBlockPass, SnitchSynchCoresPass
SnitchSynchCoresPass
from Deeploy.Targets.Snitch.DMA.SnitchDma import SnitchDma
from Deeploy.Targets.Snitch.Templates import AddTemplate, FloatGemmTemplate, RQAddTemplate, iSoftmaxTemplate
from Deeploy.Targets.Snitch.Templates.FloatSoftmaxTemplate import FloatSoftmax_Template
Expand All @@ -37,7 +37,6 @@

TiledTransformer = CodeTransformation([
SnitchCoreFilterPass("compute"),
SnitchProfileExecutionBlockPass(),
TilingVariableReplacement("L1"),
TilingCallClosure(writeback = False),
SnitchSynchCoresPass(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,55 @@

from typing import Tuple

from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, _NoVerbosity
from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, \
NodeTemplate, _NoVerbosity
from Deeploy.TilingExtension.AsyncDma import AsyncDma
from Deeploy.TilingExtension.CodeTransformationPasses.DoubleBufferingTilingCodeGeneration import \
DoubleBufferingTilingCodeGeneration
DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn
from Deeploy.TilingExtension.CodeTransformationPasses.SingleBufferingTilingCodeGeneration import \
SingleBufferingTilingCodeGeneration
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import DoubleBufferingTilingMixIn, \
SingleBufferingTilingMixIn
ProfilingSingleBufferingTilingMixIn, SingleBufferingTilingCodeGeneration


class SnitchClusterTilingSB(SingleBufferingTilingCodeGeneration, SingleBufferingTilingMixIn):
class SnitchClusterTilingSB(SingleBufferingTilingCodeGeneration):
pass


class SnitchClusterTilingDB(DoubleBufferingTilingCodeGeneration, DoubleBufferingTilingMixIn):
class SnitchClusterTilingDB(DoubleBufferingTilingCodeGeneration):
pass


class ProfilingSnitchClusterTilingSB(SingleBufferingTilingCodeGeneration, ProfilingSingleBufferingTilingMixIn):
_printCycleDifference = NodeTemplate(r"""
printf("%s%u][Core %d] %s%u%s", ${prefixStr}, ${profileIdxVar}, snrt_global_core_idx(), "${flavorStr}", \
${measurementsEnd}[${profileIdxVar}] - ${measurementsStart}[${profileIdxVar}], ${suffixStr});
""")


class ProfilingSnitchClusterTilingDB(DoubleBufferingTilingCodeGeneration, ProfilingDoubleBufferingTilingMixIn):
_printCycleDifference = NodeTemplate(r"""
printf("%s%u][Core %d] %s%u%s", ${prefixStr}, ${profileIdxVar}, snrt_global_core_idx(), "${flavorStr}", \
${measurementsEnd}[${profileIdxVar}] - ${measurementsStart}[${profileIdxVar}], ${suffixStr});
""")


class SnitchClusterTiling(CodeTransformationPass):

def __init__(self, externalMemory: str, localMemory: str, dma: AsyncDma):
self.SB = SnitchClusterTilingSB(externalMemory, localMemory, dma)
self.profilingSB = ProfilingSnitchClusterTilingSB(externalMemory, localMemory, dma)

self.DB = SnitchClusterTilingDB(externalMemory, localMemory, dma)
self.profilingDB = ProfilingSnitchClusterTilingDB(externalMemory, localMemory, dma)

def apply(self,
ctxt: NetworkContext,
executionBlock: ExecutionBlock,
name: str,
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
if verbose.tilingProfiling:
raise NotImplementedError("Profiling not implemented for L2")

ctxt, executionBlock = self.SB.apply(ctxt, executionBlock, name)
ctxt, executionBlock = self.DB.apply(ctxt, executionBlock, name)
ctxt, executionBlock = self.profilingSB.apply(ctxt, executionBlock, name)
ctxt, executionBlock = self.profilingDB.apply(ctxt, executionBlock, name)
else:
ctxt, executionBlock = self.SB.apply(ctxt, executionBlock, name)
ctxt, executionBlock = self.DB.apply(ctxt, executionBlock, name)
return ctxt, executionBlock
26 changes: 18 additions & 8 deletions Deeploy/Targets/Snitch/DMA/SnitchDma.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,41 @@
from typing import Dict, Tuple

from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation, VariableBuffer
from Deeploy.TilingExtension.AsyncDma import AsyncDma, DmaDirection, Future, TensorGroupWaitingStrategy
from Deeploy.TilingExtension.AsyncDma import AsyncDma, DmaDirection, Future, PerTensorWaitingStrategy


class SnitchBarrierFuture(Future):
_initTemplate = NodeTemplate("")
_deinitTemplate = NodeTemplate("")
_allocTemplate = NodeTemplate("")
_waitTemplate = NodeTemplate("if (snrt_is_dm_core()) snrt_dma_wait_all();")


# LMACAN: TODO: Add single transfer waiting
class SnitchFuture(Future):
_initTemplate = NodeTemplate("uint16_t ${name};")
_initTemplate = NodeTemplate("snrt_dma_txid_t ${name} = (snrt_dma_txid_t) -1;")

_deinitTemplate = NodeTemplate("")
_waitTemplate = NodeTemplate("if (snrt_is_dm_core()) snrt_dma_wait(${name});")

_allocTemplate = NodeTemplate("")

_waitTemplate = NodeTemplate(
"if ( (${name} != ( (snrt_dma_txid_t) -1) ) && snrt_is_dm_core() ) snrt_dma_wait(${name});")


class SnitchDma(AsyncDma):

_transferTemplates = {
2:
NodeTemplate(
"if (snrt_is_dm_core()) snrt_dma_start_2d(${dest}, ${src}, ${size}, ${stride_dest}, ${stride_src}, ${repeat});"
),
NodeTemplate("""
if (snrt_is_dm_core()) {
${future} = snrt_dma_start_2d(${dest}, ${src}, ${size}, ${stride_dest}, ${stride_src}, ${repeat});
// WIESEP: Hack as otherwise the last commited DMA transaction ID can never be resolved.
snrt_dma_start_2d(${dest}, ${dest}, 1, 0, 0, 0);
}
"""),
}
_waitingStrategy = TensorGroupWaitingStrategy(SnitchBarrierFuture, "")
_waitingStrategy = PerTensorWaitingStrategy(SnitchFuture)

Comment on lines 32 to 43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Annotate class attributes with ClassVar to satisfy RUF012 and signal intent.

mark mutable class attrs as ClassVar.

Apply:

-from typing import Dict, Tuple
+from typing import Dict, Tuple, ClassVar
+from Deeploy.TilingExtension.AsyncDma import AsyncDma, DmaDirection, Future, PerTensorWaitingStrategy, AsyncDmaWaitingStrategy
@@
-class SnitchDma(AsyncDma):
+class SnitchDma(AsyncDma):
 
-    _transferTemplates = {
+    _transferTemplates: ClassVar[Dict[int, NodeTemplate]] = {
@@
-    _waitingStrategy = PerTensorWaitingStrategy(SnitchFuture)
+    _waitingStrategy: ClassVar[AsyncDmaWaitingStrategy] = PerTensorWaitingStrategy(SnitchFuture)

Based on static analysis hints

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
_transferTemplates = {
2:
NodeTemplate(
"if (snrt_is_dm_core()) snrt_dma_start_2d(${dest}, ${src}, ${size}, ${stride_dest}, ${stride_src}, ${repeat});"
),
NodeTemplate("""
if (snrt_is_dm_core()) {
${future} = snrt_dma_start_2d(${dest}, ${src}, ${size}, ${stride_dest}, ${stride_src}, ${repeat});
// WIESEP: Hack as otherwise the last commited DMA transaction ID can never be resolved.
snrt_dma_start_2d(${dest}, ${dest}, 1, 0, 0, 0);
}
"""),
}
_waitingStrategy = TensorGroupWaitingStrategy(SnitchBarrierFuture, "")
_waitingStrategy = PerTensorWaitingStrategy(SnitchFuture)
from typing import Dict, Tuple, ClassVar
from Deeploy.TilingExtension.AsyncDma import (
AsyncDma,
DmaDirection,
Future,
PerTensorWaitingStrategy,
AsyncDmaWaitingStrategy,
)
class SnitchDma(AsyncDma):
_transferTemplates: ClassVar[Dict[int, NodeTemplate]] = {
2:
NodeTemplate("""
if (snrt_is_dm_core()) {
${future} = snrt_dma_start_2d(${dest}, ${src}, ${size}, ${stride_dest}, ${stride_src}, ${repeat});
// WIESEP: Hack as otherwise the last commited DMA transaction ID can never be resolved.
snrt_dma_start_2d(${dest}, ${dest}, 1, 0, 0, 0);
}
"""),
}
_waitingStrategy: ClassVar[AsyncDmaWaitingStrategy] = PerTensorWaitingStrategy(SnitchFuture)
🧰 Tools
🪛 Ruff (0.13.3)

32-41: Mutable class attributes should be annotated with typing.ClassVar

(RUF012)

🤖 Prompt for AI Agents
In Deeploy/Targets/Snitch/DMA/SnitchDma.py around lines 32 to 43, the mutable
class attributes _transferTemplates and _waitingStrategy need explicit ClassVar
typing to satisfy RUF012 and signal they are class-level, not instance-level;
import ClassVar from typing (or typing_extensions if needed) and change their
annotations to e.g. _transferTemplates: ClassVar[Dict[int, NodeTemplate]] =
{...} and _waitingStrategy: ClassVar[PerTensorWaitingStrategy] =
PerTensorWaitingStrategy(SnitchFuture), ensuring imports for any referenced
types are available.

def __init__(self, transferTemplates: Dict[int, NodeTemplate] = _transferTemplates) -> None:
super().__init__(transferTemplates)
Expand All @@ -43,13 +53,13 @@ def checkTransfer(self, ctxt: NetworkContext, externalBuffer: VariableBuffer, lo
def transferOpRepr(self, externalBuffer: VariableBuffer, localBuffer: VariableBuffer, shape: Tuple[int, ...],
strideExt: Tuple[int, ...], strideLoc: Tuple[int, ...], direction: DmaDirection,
future: Future) -> OperatorRepresentation:
_ = future
operatorRepresentation: OperatorRepresentation = {
"dest": localBuffer.name if direction == "ExternalToLocal" else externalBuffer.name,
"src": externalBuffer.name if direction == "ExternalToLocal" else localBuffer.name,
"repeat": shape[0],
"size": shape[1],
"stride_dest": strideLoc[0] if direction == "ExternalToLocal" else strideExt[0],
"stride_src": strideExt[0] if direction == "ExternalToLocal" else strideLoc[0],
"future": future.name
}
return operatorRepresentation
Loading
Loading