[flang-commits] [flang] [mlir] [MLIR][NVVM] Update mbarrier Ops to use AnyTypeOf[] (2/n) (PR #165993)
via flang-commits
flang-commits at lists.llvm.org
Sat Nov 1 07:10:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
@llvm/pr-subscribers-mlir
Author: Durgadoss R (durga4github)
<details>
<summary>Changes</summary>
This is a follow up of PR #<!-- -->165558. (1/n)
This patch updates the below mbarrier Ops to use
AnyTypeOf[] construct:
* mbarrier.arrive
* mbarrier.arrive.noComplete
* mbarrier.test.wait
* cp.async.mbarrier.arrive
* Updated existing tests accordingly.
* Verified locally that there are no new regressions in the `integration` tests.
* TODO: Two more Ops are remaining and will be migrated in a subsequent PR.
---
Patch is 26.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165993.diff
9 Files Affected:
- (modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+1-2)
- (modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+1-1)
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+56-86)
- (modified) mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp (+5-21)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+72-10)
- (modified) mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir (+5-5)
- (modified) mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir (+4-4)
- (modified) mlir/test/Dialect/LLVMIR/nvvm.mlir (+6-6)
- (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (+2-2)
``````````diff
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 15ea84565dd75..0c28f96552ef6 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3333,8 +3333,7 @@ IntrinsicLibrary::genBarrierArrive(mlir::Type resultType,
assert(args.size() == 1);
mlir::Value barrier = convertPtrToNVVMSpace(
builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared);
- return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType,
- barrier)
+ return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier)
.getResult();
}
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 09b4302446ee7..9bc135e0f12fd 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -436,7 +436,7 @@ end subroutine
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
-! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
+! CHECK: %{{.*}} = nvvm.mbarrier.arrive %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index ba5e48e4ec9ba..93dfeacf6c347 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -656,8 +656,8 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
}
def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr)> {
+ Results<(outs I64:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
let summary = "MBarrier Arrive Operation";
let description = [{
The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the
@@ -674,36 +674,32 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
value are implementation-specific.
The operation takes the following operand:
- - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
- addressing, but the address must still be in the shared memory space.
+ - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
+ must be a pointer to generic or shared::cta memory. When it is generic, the
+ underlying address must be within the shared::cta memory space; otherwise
+ the behavior is undefined.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr});
- }];
let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)";
-}
-def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr)> {
- let summary = "Shared MBarrier Arrive Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr});
+ auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)";
}
def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> {
+ Results<(outs I64:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ I32:$count)> {
let summary = "MBarrier Arrive No-Complete Operation";
let description = [{
The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation
@@ -721,33 +717,29 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
captures the phase of the *mbarrier object* prior to the arrive-on operation.
The operation takes the following operands:
- - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
- addressing, but the address must still be in the shared memory space.
+ - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
+ must be a pointer to generic or shared::cta memory. When it is generic, the
+ underlying address must be within the shared::cta memory space; otherwise
+ the behavior is undefined.
- `count`: Integer specifying the count argument to the arrive-on operation.
Must be in the valid range as specified in the *mbarrier object* contents.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count});
- }];
- let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
-}
-def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> {
- let summary = "Shared MBarrier Arrive No-Complete Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
+ let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count});
+ auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
}
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
@@ -896,8 +888,9 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p
}
def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> {
+ Results<(outs I1:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ I64:$state)> {
let summary = "MBarrier Non-Blocking Test Wait Operation";
let description = [{
The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the
@@ -944,26 +937,20 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state});
- }];
- let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
-}
-def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> {
- let summary = "Shared MBarrier Non-Blocking Test Wait Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
+ let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state});
+ auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
}
//===----------------------------------------------------------------------===//
@@ -1534,47 +1521,30 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> {
The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
all prior cp.async operations initiated by the executing thread.
The `addr` operand specifies the address of the *mbarrier object*
- in generic address space. The `noinc` attr impacts how the
- mbarrier's state is updated.
+ in generic or shared::cta address space. When it is generic, the
+ underlying memory should fall within the shared::cta space;
+ otherwise the behavior is undefined. The `noinc` attr impacts
+ how the mbarrier's state is updated.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
}];
- let assemblyFormat = "$addr attr-dict `:` type(operands)";
let arguments = (ins
- LLVM_AnyPointer:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
+ AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ DefaultValuedAttr<I1Attr, "0">:$noinc);
- string llvmBuilder = [{
- auto intId = $noinc ?
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc :
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
-
- createIntrinsicCall(builder, intId, {$addr});
- }];
-}
-
-def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> {
- let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared";
- let description = [{
- The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object*
- track all prior cp.async operations initiated by the executing thread.
- The `addr` operand specifies the address of the *mbarrier object* in
- shared memory. The `noinc` attr impacts how the mbarrier's state
- is updated.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
- }];
let assemblyFormat = "$addr attr-dict `:` type(operands)";
- let arguments = (ins
- LLVM_PointerShared:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
+ }];
string llvmBuilder = [{
- auto intId = $noinc ?
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared :
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared;
-
- createIntrinsicCall(builder, intId, {$addr});
+ auto [id, args] = NVVM::CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ createIntrinsicCall(builder, id, args);
}];
}
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index ec182f1db48ac..9348d3c172a07 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -865,13 +865,7 @@ struct NVGPUMBarrierArriveLowering
adaptor.getMbarId(), rewriter);
Type tokenType = getTypeConverter()->convertType(
nvgpu::MBarrierTokenType::get(op->getContext()));
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveSharedOp>(op, tokenType,
- barrier);
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType,
- barrier);
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType, barrier);
return success();
}
};
@@ -892,13 +886,8 @@ struct NVGPUMBarrierArriveNoCompleteLowering
Type tokenType = getTypeConverter()->convertType(
nvgpu::MBarrierTokenType::get(op->getContext()));
Value count = truncToI32(b, adaptor.getCount());
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteSharedOp>(
- op, tokenType, barrier, count);
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
- op, tokenType, barrier, count);
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
+ op, tokenType, barrier, count);
return success();
}
};
@@ -915,13 +904,8 @@ struct NVGPUMBarrierTestWaitLowering
getMbarrierPtr(b, op.getBarriers().getType(), adaptor.getBarriers(),
adaptor.getMbarId(), rewriter);
Type retType = rewriter.getI1Type();
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitSharedOp>(
- op, retType, barrier, adaptor.getToken());
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(
- op, retType, barrier, adaptor.getToken());
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(op, retType, barrier,
+ adaptor.getToken());
return success();
}
};
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index a5ffb9e77fa9d..78b320906b638 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1637,15 +1637,21 @@ std::string NVVM::MBarrierInitOp::getPtx() {
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//
+static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) {
+ auto ptrTy = llvm::cast<LLVM::LLVMPointerType>(ptr.getType());
+ return ptrTy.getAddressSpace() == static_cast<unsigned>(targetAS);
+}
+
+static bool isPtrInSharedCTASpace(mlir::Value ptr) {
+ return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
+}
+
mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInitOp>(op);
- unsigned addressSpace =
- llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
- .getAddressSpace();
- llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
- ? llvm::Intrinsic::nvvm_mbarrier_init_shared
- : llvm::Intrinsic::nvvm_mbarrier_init;
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_init_shared
+ : llvm::Intrinsic::nvvm_mbarrier_init;
// Fill the Intrinsic Args
llvm::SmallVector<llvm::Value *> args;
@@ -1658,16 +1664,72 @@ mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInvalOp>(op);
- unsigned addressSpace =
- llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
- .getAddressSpace();
- llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
? llvm::Intrinsic::nvvm_mbarrier_inval_shared
: llvm::Intrinsic::nvvm_mbarrier_inval;
return {id, {mt.lookupValue(thisOp.getAddr())}};
}
+mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
+ ? llvm::Intrinsic::nvvm_mbarrier_arrive_shared
+ : llvm::Intrinsic::nvvm_mbarrier_arrive;
+
+ return {id, {mt.lookupValue(thisOp.getAddr())}};
+}
+
+mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveNocompleteOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id =
+ isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared
+ : llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete;
+ // Fill the Intrinsic Args
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(thisOp.getAddr()));
+ args.push_back(mt.lookupValue(thisOp.getCount()));
+
+ return {id, std::move(args)};
+}
+
+mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierTestWaitOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
+ ? llvm::Intrinsic::nvvm_mbarrier_test_wait_shared
+ : llvm::Intrinsic::nvvm_mbarrier_test_wait;
+ // Fill the Intrinsic Args
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(thisOp.getAddr()));
+ args.push_back(mt.lookupValue(thisOp.getState()));
+
+ return {id, std::move(args)};
+}
+
+mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::CpAsyncMBarrierArriveOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+
+ llvm::Intrinsic::ID id;
+ if (thisOp.getNoinc()) {
+ id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared
+ : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc;
+ } else {
+ id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared
+ : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
+ }
+
+ return {id, {mt.lookupValue(thisOp.getAddr())}};
+}
+
#define CP_ASYNC_ID_IMPL(mod, size, suffix) \
llvm::Intrinsic...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165993
More information about the flang-commits
mailing list