[flang-commits] [flang] 35ee3c6 - [MLIR][NVVM] Update mbarrier Ops to use AnyTypeOf[] (2/n) (#165993)
via flang-commits
flang-commits at lists.llvm.org
Wed Nov 5 02:08:29 PST 2025
Author: Durgadoss R
Date: 2025-11-05T15:38:24+05:30
New Revision: 35ee3c6f72ba5aa26299d693f866385f23e4d330
URL: https://github.com/llvm/llvm-project/commit/35ee3c6f72ba5aa26299d693f866385f23e4d330
DIFF: https://github.com/llvm/llvm-project/commit/35ee3c6f72ba5aa26299d693f866385f23e4d330.diff
LOG: [MLIR][NVVM] Update mbarrier Ops to use AnyTypeOf[] (2/n) (#165993)
This is a follow up of PR #165558. (1/n)
This patch updates the below mbarrier Ops to use
AnyTypeOf[] construct:
```
* mbarrier.arrive
* mbarrier.arrive.noComplete
* mbarrier.test.wait
* cp.async.mbarrier.arrive
```
* Updated existing tests accordingly.
* Verified locally that there are no new regressions in the `integration` tests.
* TODO: Two more Ops remain and will be migrated in a subsequent PR.
Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
Added:
Modified:
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
flang/test/Lower/CUDA/cuda-device-proc.cuf
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
mlir/test/Dialect/LLVMIR/nvvm.mlir
mlir/test/Target/LLVMIR/nvvmir.mlir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 4e276a72897fe..6312e61f5e62a 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -891,8 +891,7 @@ CUDAIntrinsicLibrary::genBarrierArrive(mlir::Type resultType,
assert(args.size() == 1);
mlir::Value barrier = convertPtrToNVVMSpace(
builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared);
- return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType,
- barrier)
+ return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier)
.getResult();
}
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 038aa0a058277..2d2c801b48f4d 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -440,7 +440,7 @@ end subroutine
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
-! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
+! CHECK: %{{.*}} = nvvm.mbarrier.arrive %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 5f87e5c07e56e..10f0cc254ea97 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -658,8 +658,8 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
}
def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr)> {
+ Results<(outs I64:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
let summary = "MBarrier Arrive Operation";
let description = [{
The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the
@@ -676,36 +676,32 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
value are implementation-specific.
The operation takes the following operand:
- - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
- addressing, but the address must still be in the shared memory space.
+ - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
+ must be a pointer to generic or shared::cta memory. When it is generic, the
+ underlying address must be within the shared::cta memory space; otherwise
+ the behavior is undefined.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr});
- }];
let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)";
-}
-def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr)> {
- let summary = "Shared MBarrier Arrive Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr});
+ auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)";
}
def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> {
+ Results<(outs I64:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ I32:$count)> {
let summary = "MBarrier Arrive No-Complete Operation";
let description = [{
The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation
@@ -723,33 +719,29 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
captures the phase of the *mbarrier object* prior to the arrive-on operation.
The operation takes the following operands:
- - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
- addressing, but the address must still be in the shared memory space.
+ - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
+ must be a pointer to generic or shared::cta memory. When it is generic, the
+ underlying address must be within the shared::cta memory space; otherwise
+ the behavior is undefined.
- `count`: Integer specifying the count argument to the arrive-on operation.
Must be in the valid range as specified in the *mbarrier object* contents.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count});
- }];
- let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
-}
-def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> {
- let summary = "Shared MBarrier Arrive No-Complete Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
+ let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count});
+ auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
}
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
@@ -898,8 +890,9 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p
}
def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> {
+ Results<(outs I1:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ I64:$state)> {
let summary = "MBarrier Non-Blocking Test Wait Operation";
let description = [{
The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the
@@ -946,26 +939,20 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
}];
- string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state});
- }];
- let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
-}
-def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">,
- Results<(outs LLVM_Type:$res)>,
- Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> {
- let summary = "Shared MBarrier Non-Blocking Test Wait Operation";
- let description = [{
- This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object*
- should be accessed using a shared-memory pointer instead of a generic-memory pointer.
+ let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
}];
+
string llvmBuilder = [{
- $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state});
+ auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
}];
- let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
}
//===----------------------------------------------------------------------===//
@@ -1541,47 +1528,30 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> {
The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
all prior cp.async operations initiated by the executing thread.
The `addr` operand specifies the address of the *mbarrier object*
- in generic address space. The `noinc` attr impacts how the
- mbarrier's state is updated.
+ in generic or shared::cta address space. When it is generic, the
+ underlying memory should fall within the shared::cta space;
+ otherwise the behavior is undefined. The `noinc` attr impacts
+ how the mbarrier's state is updated.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
}];
- let assemblyFormat = "$addr attr-dict `:` type(operands)";
let arguments = (ins
- LLVM_AnyPointer:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
-
- string llvmBuilder = [{
- auto intId = $noinc ?
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc :
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
-
- createIntrinsicCall(builder, intId, {$addr});
- }];
-}
+ AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ DefaultValuedAttr<I1Attr, "0">:$noinc);
-def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> {
- let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared";
- let description = [{
- The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object*
- track all prior cp.async operations initiated by the executing thread.
- The `addr` operand specifies the address of the *mbarrier object* in
- shared memory. The `noinc` attr impacts how the mbarrier's state
- is updated.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
- }];
let assemblyFormat = "$addr attr-dict `:` type(operands)";
- let arguments = (ins
- LLVM_PointerShared:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
+ }];
string llvmBuilder = [{
- auto intId = $noinc ?
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared :
- llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared;
-
- createIntrinsicCall(builder, intId, {$addr});
+ auto [id, args] = NVVM::CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ createIntrinsicCall(builder, id, args);
}];
}
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index ec182f1db48ac..9348d3c172a07 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -865,13 +865,7 @@ struct NVGPUMBarrierArriveLowering
adaptor.getMbarId(), rewriter);
Type tokenType = getTypeConverter()->convertType(
nvgpu::MBarrierTokenType::get(op->getContext()));
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveSharedOp>(op, tokenType,
- barrier);
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType,
- barrier);
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType, barrier);
return success();
}
};
@@ -892,13 +886,8 @@ struct NVGPUMBarrierArriveNoCompleteLowering
Type tokenType = getTypeConverter()->convertType(
nvgpu::MBarrierTokenType::get(op->getContext()));
Value count = truncToI32(b, adaptor.getCount());
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteSharedOp>(
- op, tokenType, barrier, count);
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
- op, tokenType, barrier, count);
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
+ op, tokenType, barrier, count);
return success();
}
};
@@ -915,13 +904,8 @@ struct NVGPUMBarrierTestWaitLowering
getMbarrierPtr(b, op.getBarriers().getType(), adaptor.getBarriers(),
adaptor.getMbarId(), rewriter);
Type retType = rewriter.getI1Type();
- if (isMbarrierShared(op.getBarriers().getType())) {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitSharedOp>(
- op, retType, barrier, adaptor.getToken());
- } else {
- rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(
- op, retType, barrier, adaptor.getToken());
- }
+ rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(op, retType, barrier,
+ adaptor.getToken());
return success();
}
};
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 262d9b753a2d7..d43f8815be16d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1752,15 +1752,21 @@ std::string NVVM::MBarrierInitOp::getPtx() {
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//
+static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) {
+ auto ptrTy = llvm::cast<LLVM::LLVMPointerType>(ptr.getType());
+ return ptrTy.getAddressSpace() == static_cast<unsigned>(targetAS);
+}
+
+static bool isPtrInSharedCTASpace(mlir::Value ptr) {
+ return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
+}
+
mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInitOp>(op);
- unsigned addressSpace =
- llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
- .getAddressSpace();
- llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
- ? llvm::Intrinsic::nvvm_mbarrier_init_shared
- : llvm::Intrinsic::nvvm_mbarrier_init;
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_init_shared
+ : llvm::Intrinsic::nvvm_mbarrier_init;
// Fill the Intrinsic Args
llvm::SmallVector<llvm::Value *> args;
@@ -1773,16 +1779,72 @@ mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInvalOp>(op);
- unsigned addressSpace =
- llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
- .getAddressSpace();
- llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
? llvm::Intrinsic::nvvm_mbarrier_inval_shared
: llvm::Intrinsic::nvvm_mbarrier_inval;
return {id, {mt.lookupValue(thisOp.getAddr())}};
}
+mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
+ ? llvm::Intrinsic::nvvm_mbarrier_arrive_shared
+ : llvm::Intrinsic::nvvm_mbarrier_arrive;
+
+ return {id, {mt.lookupValue(thisOp.getAddr())}};
+}
+
+mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveNocompleteOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id =
+ isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared
+ : llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete;
+ // Fill the Intrinsic Args
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(thisOp.getAddr()));
+ args.push_back(mt.lookupValue(thisOp.getCount()));
+
+ return {id, std::move(args)};
+}
+
+mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierTestWaitOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id = isShared
+ ? llvm::Intrinsic::nvvm_mbarrier_test_wait_shared
+ : llvm::Intrinsic::nvvm_mbarrier_test_wait;
+ // Fill the Intrinsic Args
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(thisOp.getAddr()));
+ args.push_back(mt.lookupValue(thisOp.getState()));
+
+ return {id, std::move(args)};
+}
+
+mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::CpAsyncMBarrierArriveOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+
+ llvm::Intrinsic::ID id;
+ if (thisOp.getNoinc()) {
+ id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared
+ : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc;
+ } else {
+ id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared
+ : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
+ }
+
+ return {id, {mt.lookupValue(thisOp.getAddr())}};
+}
+
#define CP_ASYNC_ID_IMPL(mod, size, suffix) \
llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 8cce6308018e2..dcf4ddb2dd48c 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -491,12 +491,12 @@ func.func @mbarrier() {
// CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr2:.+]] = llvm.getelementptr %[[base2]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
- // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.shared %[[barPtr2]]
+ // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive %[[barPtr2]]
%token = nvgpu.mbarrier.arrive %barrier[%c0] : !barrierType -> !tokenType
// CHECK: %[[base3:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr3:.+]] = llvm.getelementptr %[[base3]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
- // CHECK: nvvm.mbarrier.test.wait.shared %[[barPtr3]], %[[token]]
+ // CHECK: nvvm.mbarrier.test.wait %[[barPtr3]], %[[token]]
%isDone = nvgpu.mbarrier.test.wait %barrier[%c0], %token : !barrierType, !tokenType
func.return
@@ -521,12 +521,12 @@ func.func @mbarrier_nocomplete() {
// CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr2:.+]] = llvm.getelementptr %[[base2]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
- // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.nocomplete.shared %[[barPtr2]]
+ // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.nocomplete %[[barPtr2]]
%token = nvgpu.mbarrier.arrive.nocomplete %barrier[%c0], %count : !barrierType -> !tokenType
// CHECK: %[[base3:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr3:.+]] = llvm.getelementptr %[[base3]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
- // CHECK: nvvm.mbarrier.test.wait.shared %[[barPtr3]], %[[token]]
+ // CHECK: nvvm.mbarrier.test.wait %[[barPtr3]], %[[token]]
%isDone = nvgpu.mbarrier.test.wait %barrier[%c0], %token : !barrierType, !tokenType
func.return
@@ -572,7 +572,7 @@ func.func @mbarrier_wait(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.ad
// CHECK: %[[S3:.+]] = builtin.unrealized_conversion_cast %[[S2]] : index to i64
// CHECK: %[[S4:.+]] = llvm.extractvalue %[[CARG0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[S5:.+]] = llvm.getelementptr %[[S4]][%[[S3]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
-// CHECK: nvvm.mbarrier.test.wait.shared {{.*}}, %[[CARG1]]
+// CHECK: nvvm.mbarrier.test.wait {{.*}}, %[[CARG1]]
%mbarId = arith.remui %i, %numBarriers : index
%isDone = nvgpu.mbarrier.test.wait %barriers[%mbarId], %token : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>, !tokenType
}
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index fbc4c0af60360..a9356c5cb60bb 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -88,10 +88,10 @@ func.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.p
nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr
// CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true}
nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr
- // CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}}
- nvvm.cp.async.mbarrier.arrive.shared %bar_shared : !llvm.ptr<3>
- // CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}} {noinc = true}
- nvvm.cp.async.mbarrier.arrive.shared %bar_shared {noinc = true} : !llvm.ptr<3>
+ // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}}
+ nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3>
+ // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true}
+ nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3>
llvm.return
}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 2505e56407c2b..cd7bd37da5763 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -445,8 +445,8 @@ llvm.func private @mbarrier_arrive(%barrier: !llvm.ptr) {
}
llvm.func private @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) {
- // CHECK: nvvm.mbarrier.arrive.shared %{{.*}} : !llvm.ptr<3>
- %0 = nvvm.mbarrier.arrive.shared %barrier : !llvm.ptr<3> -> i64
+ // CHECK: nvvm.mbarrier.arrive %{{.*}} : !llvm.ptr<3>
+ %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64
llvm.return
}
@@ -459,8 +459,8 @@ llvm.func private @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) {
llvm.func private @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) {
%count = nvvm.read.ptx.sreg.ntid.x : i32
- // CHECK: nvvm.mbarrier.arrive.nocomplete.shared %{{.*}} : !llvm.ptr<3>
- %0 = nvvm.mbarrier.arrive.nocomplete.shared %barrier, %count : !llvm.ptr<3>, i32 -> i64
+ // CHECK: nvvm.mbarrier.arrive.nocomplete %{{.*}} : !llvm.ptr<3>
+ %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32 -> i64
llvm.return
}
@@ -472,8 +472,8 @@ llvm.func private @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 {
llvm.func private @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) {
%count = nvvm.read.ptx.sreg.ntid.x : i32
- // CHECK: nvvm.mbarrier.test.wait.shared %{{.*}}
- %isComplete = nvvm.mbarrier.test.wait.shared %barrier, %token : !llvm.ptr<3>, i64 -> i1
+ // CHECK: nvvm.mbarrier.test.wait %{{.*}}
+ %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 9115de65ff0e8..3fc09f371a347 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -538,9 +538,9 @@ llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.p
// CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}})
nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr
// CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}})
- nvvm.cp.async.mbarrier.arrive.shared %bar_shared : !llvm.ptr<3>
+ nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3>
// CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}})
- nvvm.cp.async.mbarrier.arrive.shared %bar_shared {noinc = true} : !llvm.ptr<3>
+ nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3>
llvm.return
}
More information about the flang-commits
mailing list