[flang-commits] [flang] [flang][cuda] Add instrustions for tma_bulk_s2g (PR #165480)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Oct 28 14:13:46 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/165480
None
>From 008cc8823139e0576d64de7e06c57e022d130936 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 28 Oct 2025 14:12:55 -0700
Subject: [PATCH] [flang][cuda] Add instrustions for tma_bulk_s2g
---
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 5 +++++
flang/test/Lower/CUDA/cuda-device-proc.cuf | 2 ++
2 files changed, 7 insertions(+)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0d225532f2460..6e8b411644758 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -9287,6 +9287,11 @@ void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
mlir::NVVM::NVVMMemorySpace::Global);
mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+
+ mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {},
+ "cp.async.bulk.commit_group", {});
+ mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc,
+ builder.getI32IntegerAttr(0), {});
}
// TMA_BULK_WAIT_GROUP (CUDA)
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 99b1a2fc0cbf7..8bf506b0518ed 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -479,6 +479,8 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_bulk_s2g
! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
attributes(device) subroutine testAtomicCasLoop(aa, n)
integer :: a
More information about the flang-commits
mailing list