@@ -406,8 +406,10 @@ class InsertGPUAllocsPass final
406
406
auto newAlloc = builder.create <mlir::memref::AllocOp>(
407
407
loc, alloc.getType (), alloc.getDynamicSizes (),
408
408
alloc.getSymbolOperands ());
409
- builder.create <mlir::memref::CopyOp>(loc, allocResult,
410
- newAlloc.getResult ());
409
+ builder.create <mlir::gpu::MemcpyOp>(
410
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
411
+ /* asyncDependencies*/ std::nullopt, newAlloc.getResult (),
412
+ allocResult);
411
413
use.set (newAlloc.getResult ());
412
414
}
413
415
}
@@ -456,8 +458,9 @@ class InsertGPUAllocsPass final
456
458
/* symbolOperands*/ std::nullopt, hostShared);
457
459
auto allocResult = gpuAlloc.getResult (0 );
458
460
if (access .hostWrite && access .deviceRead ) {
459
- auto copy =
460
- builder.create <mlir::memref::CopyOp>(loc, op, allocResult);
461
+ auto copy = builder.create <mlir::gpu::MemcpyOp>(
462
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
463
+ /* asyncDependencies*/ std::nullopt, allocResult, op);
461
464
filter.insert (copy);
462
465
}
463
466
@@ -476,7 +479,9 @@ class InsertGPUAllocsPass final
476
479
op.replaceAllUsesExcept (allocResult, filter);
477
480
builder.setInsertionPoint (term);
478
481
if (access .hostRead && access .deviceWrite ) {
479
- builder.create <mlir::memref::CopyOp>(loc, allocResult, op);
482
+ builder.create <mlir::gpu::MemcpyOp>(
483
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
484
+ /* asyncDependencies*/ std::nullopt, op, allocResult);
480
485
}
481
486
builder.create <mlir::gpu::DeallocOp>(loc, std::nullopt, allocResult);
482
487
}
0 commit comments