[Mlir-commits] [mlir] [mlir][NVVM] Add support for few more fence Ops (PR #170251)
Pradeep Kumar
llvmlistbot at llvm.org
Mon Dec 1 23:09:39 PST 2025
https://github.com/schwarzschild-radius created https://github.com/llvm/llvm-project/pull/170251
This commit adds support for the following fence Ops:
- fence.acquire.sync_restrict
- fence.release.sync_restrict
- fence.proxy.acquire.sync_restrict
- fence.proxy.release.sync_restrict
and removes fence.sc.cluster. The commit also moves memory.barrier into the Membar/Fence section
>From 0c128f8aa9f970ef41c06b31993d207c5f968cca Mon Sep 17 00:00:00 2001
From: Pradeep Kumar <pradeepku at nvidia.com>
Date: Mon, 1 Dec 2025 07:14:22 +0000
Subject: [PATCH] [mlir][NVVM] Add support for few more fence Ops
This commit adds support for the following fence Ops:
- fence.acquire.sync_restrict
- fence.release.sync_restrict
- fence.proxy.acquire.sync_restrict
- fence.proxy.release.sync_restrict
and removes fence.sc.cluster. The commit also moves memory.barrier into the Membar/Fence section
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 160 +++++++++++++-----
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 20 ++-
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 19 +++
.../Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 7 -
mlir/test/Dialect/LLVMIR/nvvm.mlir | 7 -
mlir/test/Target/LLVMIR/nvvm/fence.mlir | 78 +++++++++
mlir/test/Target/LLVMIR/nvvmir.mlir | 36 ----
7 files changed, 230 insertions(+), 97 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fence.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index b3395b7e0a24e..95bf5709030e2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1319,11 +1319,70 @@ def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> {
let assemblyFormat = "attr-dict";
}
-def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> {
- string llvmBuilder = [{
- createIntrinsicCall(builder, llvm::Intrinsic::nvvm_fence_sc_cluster);
+//===----------------------------------------------------------------------===//
+// NVVM Member/Fence
+//===----------------------------------------------------------------------===//
+
+def NVVM_MembarOp : NVVM_Op<"memory.barrier">,
+ Arguments<(ins MemScopeKindAttr:$scope)> {
+ let summary = "Memory barrier operation";
+ let description = [{
+ `membar` operation guarantees that prior memory accesses requested by this
+ thread are performed at the specified `scope`, before later memory
+ operations requested by this thread following the membar instruction.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "$scope attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder, getMembarIntrinsicID($scope));
+ }];
+}
+
+def NVVM_FenceAcquireSyncRestrictOp : NVVM_Op<"fence.acquire.sync_restrict"> {
+ let summary = "Uni-directional thread fence operation with acquire semantics";
+ let description = [{
+ The `nvvm.fence.acquire.sync_restrict` Op restricts the class of memory
+ operations for which the fence instruction provides the memory ordering guarantees.
+ `sync_restrict` restricts `acquire` memory semantics to `shared_cluster` with cluster scope.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
}];
+
let assemblyFormat = "attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::nvvm_fence_acquire_sync_restrict_space_cluster_scope_cluster);
+ }];
+}
+
+def NVVM_FenceReleaseSyncRestrictOp : NVVM_Op<"fence.release.sync_restrict"> {
+ let summary = "Uni-directional thread fence operation with release semantics";
+ let description = [{
+ The `nvvm.fence.release.sync_restrict` Op restricts the class of memory
+ operations for which the fence instruction provides the memory ordering guarantees.
+ `sync_restrict` restricts `release` memory semantics to `shared_cta` with cluster scope.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::nvvm_fence_release_sync_restrict_space_cta_scope_cluster);
+ }];
+}
+
+def NVVM_FenceMbarrierInitOp : NVVM_Op<"fence.mbarrier.init"> {
+ let description = [{
+ Fence operation that applies on the prior nvvm.mbarrier.init
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_fence_mbarrier_init_release_cluster);
+ }];
}
def ProxyAlias : I32EnumAttrCase<"alias", 0, "alias">;
@@ -1339,10 +1398,15 @@ def ProxyKind : I32EnumAttr<"ProxyKind", "Proxy kind",
}
def ProxyKindAttr : EnumAttr<NVVM_Dialect, ProxyKind, "proxy_kind"> {
+ let description = [{
+ ProxyKind attribute represents a memory proxy which is an abstract label
+ applied to a method of memory access. When two memory operations use distinct
+ methods of memory access, they are said to be different proxies.
+ }];
let assemblyFormat = "`<` $value `>`";
}
-def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">,
+def NVVM_FenceProxyOp : NVVM_Op<"fence.proxy">,
Arguments<(ins ProxyKindAttr:$kind,
OptionalAttr<SharedSpaceAttr>:$space)> {
let description = [{
@@ -1353,16 +1417,11 @@ def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">,
}];
let assemblyFormat = "attr-dict";
- let extraClassDefinition = [{
- std::string $cppClass::getPtx() {
- std::string ptx = "fence.proxy.";
- ptx += stringifyProxyKind(getKind());
- if(getKind() == NVVM::ProxyKind::async_shared)
- { ptx += "::"; ptx += stringifySharedSpace(getSpace().value()); }
- ptx += ";";
- return ptx;
- }
+
+ let llvmBuilder = [{
+ createIntrinsicCall(builder, getFenceProxyID($kind, $space));
}];
+
let hasVerifier = 1;
}
@@ -1399,23 +1458,6 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">,
let hasVerifier = 1;
}
-def NVVM_MembarOp : NVVM_Op<"memory.barrier">,
- Arguments<(ins MemScopeKindAttr:$scope)> {
- let summary = "Memory barrier operation";
- let description = [{
- `membar` operation guarantees that prior memory accesses requested by this
- thread are performed at the specified `scope`, before later memory
- operations requested by this thread following the membar instruction.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
- }];
-
- let assemblyFormat = "$scope attr-dict";
- let llvmBuilder = [{
- createIntrinsicCall(builder, getMembarIntrinsicID($scope), {});
- }];
-}
-
def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
Arguments<(ins MemScopeKindAttr:$scope,
DefaultValuedAttr<ProxyKindAttr,
@@ -1442,6 +1484,48 @@ def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
let hasVerifier = 1;
}
+def NVVM_FenceProxyAcquireSyncRestrictOp : NVVM_Op<"fence.proxy.acquire.sync_restrict">,
+ Arguments<(ins DefaultValuedAttr<ProxyKindAttr, "ProxyKind::GENERIC">:$fromProxy,
+ DefaultValuedAttr<ProxyKindAttr, "ProxyKind::async">:$toProxy)> {
+ let summary = "Uni-directional thread fence operation with acquire semantics";
+ let description = [{
+ The `nvvm.fence.proxy.acquire.sync_restrict` Op used to establish
+ ordering between a prior memory access performed between proxies. Currently,
+ the ordering is only supported between async and generic proxies. `sync_restrict`
+ restricts `acquire` memory semantics to `shared_cluster` with cluster scope.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::nvvm_fence_proxy_async_generic_acquire_sync_restrict_space_cluster_scope_cluster);
+ }];
+
+ let hasVerifier = 1;
+}
+
+def NVVM_FenceProxyReleaseSyncRestrictOp : NVVM_Op<"fence.proxy.release.sync_restrict">,
+ Arguments<(ins DefaultValuedAttr<ProxyKindAttr, "ProxyKind::GENERIC">:$fromProxy,
+ DefaultValuedAttr<ProxyKindAttr, "ProxyKind::async">:$toProxy)> {
+ let summary = "Uni-directional thread fence operation with release semantics";
+ let description = [{
+ The `nvvm.fence.proxy.release.sync_restrict` Op used to establish
+ ordering between a prior memory access performed between proxies. Currently,
+ the ordering is only supported between async and generic proxies. `sync_restrict`
+ restricts `release` memory semantics to `shared_cta` with cluster scope.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster);
+ }];
+
+ let hasVerifier = 1;
+}
+
def SetMaxRegisterActionIncrease : I32EnumAttrCase<"increase", 0>;
def SetMaxRegisterActionDecrease : I32EnumAttrCase<"decrease", 1>;
def SetMaxRegisterAction : I32EnumAttr<"SetMaxRegisterAction", "NVVM set max register action",
@@ -1464,22 +1548,6 @@ def NVVM_SetMaxRegisterOp : NVVM_Op<"setmaxregister"> {
}];
}
-def NVVM_FenceMbarrierInitOp : NVVM_PTXBuilder_Op<"fence.mbarrier.init"> {
- let arguments = (ins );
- let description = [{
- Fence operation that applies on the prior nvvm.mbarrier.init
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
- }];
-
- let assemblyFormat = "attr-dict";
- let extraClassDefinition = [{
- std::string $cppClass::getPtx() {
- return std::string("fence.mbarrier_init.release.cluster;");
- }
- }];
-}
-
def ShflKindBfly : I32EnumAttrCase<"bfly", 0>;
def ShflKindUp : I32EnumAttrCase<"up", 1>;
def ShflKindDown : I32EnumAttrCase<"down", 2>;
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 413125245aca8..0954a82d12bea 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -2112,7 +2112,6 @@ LogicalResult NVVM::FenceProxyAcquireOp::verify() {
if (getToProxy() != NVVM::ProxyKind::TENSORMAP)
return emitOpError("uni-directional proxies only support tensormap "
"for to_proxy attribute");
-
return success();
}
@@ -2124,7 +2123,26 @@ LogicalResult NVVM::FenceProxyReleaseOp::verify() {
if (getToProxy() != NVVM::ProxyKind::TENSORMAP)
return emitOpError("uni-directional proxies only support tensormap "
"for to_proxy attribute");
+ return success();
+}
+
+LogicalResult NVVM::FenceProxyAcquireSyncRestrictOp::verify() {
+ if (getFromProxy() != NVVM::ProxyKind::GENERIC)
+ return emitOpError("uni-directional proxies only support generic for "
+ "from_proxy attribute");
+
+ if (getToProxy() != NVVM::ProxyKind::async)
+ return emitOpError("only async is supported for to_proxy attribute");
+ return success();
+}
+
+LogicalResult NVVM::FenceProxyReleaseSyncRestrictOp::verify() {
+ if (getFromProxy() != NVVM::ProxyKind::GENERIC)
+ return emitOpError("uni-directional proxies only support generic for "
+ "from_proxy attribute");
+ if (getToProxy() != NVVM::ProxyKind::async)
+ return emitOpError("only async is supported for to_proxy attribute");
return success();
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cecff51e637a5..95d41d05658e6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -411,6 +411,25 @@ getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) {
llvm_unreachable("unhandled tcgen05.st lowering");
}
+static llvm::Intrinsic::ID getFenceProxyID(NVVM::ProxyKind kind,
+ std::optional<NVVM::SharedSpace> space) {
+ switch (kind) {
+ case NVVM::ProxyKind::alias:
+ return llvm::Intrinsic::nvvm_fence_proxy_alias;
+ case NVVM::ProxyKind::async:
+ return llvm::Intrinsic::nvvm_fence_proxy_async;
+ case NVVM::ProxyKind::async_global:
+ return llvm::Intrinsic::nvvm_fence_proxy_async_global;
+ case NVVM::ProxyKind::async_shared:
+ if (*space == NVVM::SharedSpace::shared_cta)
+ return llvm::Intrinsic::nvvm_fence_proxy_async_shared_cta;
+ else
+ return llvm::Intrinsic::nvvm_fence_proxy_async_shared_cluster;
+ default:
+ llvm_unreachable("unsupported proxy kind for fence.proxy Op");
+ }
+}
+
namespace {
/// Implementation of the dialect interface that converts operations belonging
/// to the NVVM dialect to LLVM IR.
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index fbf8d9efb3bc7..442046b9f3302 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -584,13 +584,6 @@ func.func @cp_async_bulk_wait_group() {
// -----
-func.func @fence_mbarrier_init() {
- //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.mbarrier_init.release.cluster;"
- nvvm.fence.mbarrier.init
- func.return
-}
-// -----
-
func.func @fence_proxy() {
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.alias;", "" : () -> ()
nvvm.fence.proxy { kind = #nvvm.proxy_kind<alias>}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index cd7bd37da5763..3bd9d9dd390e5 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -92,13 +92,6 @@ func.func @llvm_nvvm_cluster_wait() {
llvm.return
}
-// CHECK-LABEL: @llvm_nvvm_fence_sc_cluster
-func.func @llvm_nvvm_fence_sc_cluster() {
- // CHECK: nvvm.fence.sc.cluster
- nvvm.fence.sc.cluster
- llvm.return
-}
-
// CHECK-LABEL: @nvvm_shfl
func.func @nvvm_shfl(
%arg0 : i32, %arg1 : i32, %arg2 : i32,
diff --git a/mlir/test/Target/LLVMIR/nvvm/fence.mlir b/mlir/test/Target/LLVMIR/nvvm/fence.mlir
new file mode 100644
index 0000000000000..26ca922e178f2
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fence.mlir
@@ -0,0 +1,78 @@
+// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: @nvvm_fence_sync_restrict
+llvm.func @nvvm_fence_sync_restrict() {
+ // CHECK: call void @llvm.nvvm.fence.acquire.sync_restrict.space.cluster.scope.cluster()
+ nvvm.fence.acquire.sync_restrict
+ // CHECK: call void @llvm.nvvm.fence.release.sync_restrict.space.cta.scope.cluster()
+ nvvm.fence.release.sync_restrict
+ llvm.return
+}
+
+// CHECK-LABEL: @nvvm_fence_proxy
+llvm.func @nvvm_fence_proxy() {
+ // CHECK: call void @llvm.nvvm.fence.proxy.alias()
+ nvvm.fence.proxy {kind = #nvvm.proxy_kind<alias>}
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.async()
+ nvvm.fence.proxy {kind = #nvvm.proxy_kind<async>}
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.async.global()
+ nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.global>}
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cta()
+ nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cluster()
+ nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cluster>}
+ llvm.return
+}
+
+// CHECK-LABEL: @nvvm_fence_proxy_sync_restrict
+llvm.func @nvvm_fence_proxy_sync_restrict() {
+ // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.acquire.sync_restrict.space.cluster.scope.cluster()
+ nvvm.fence.proxy.acquire.sync_restrict
+ // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.release.sync_restrict.space.cta.scope.cluster()
+ nvvm.fence.proxy.release.sync_restrict
+ llvm.return
+}
+
+// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_release
+llvm.func @nvvm_fence_proxy_tensormap_generic_release() {
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta()
+ nvvm.fence.proxy.release #nvvm.mem_scope<cta>
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster()
+ nvvm.fence.proxy.release #nvvm.mem_scope<cluster>
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu()
+ nvvm.fence.proxy.release #nvvm.mem_scope<gpu>
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys()
+ nvvm.fence.proxy.release #nvvm.mem_scope<sys>
+ llvm.return
+}
+
+// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_acquire
+llvm.func @nvvm_fence_proxy_tensormap_generic_acquire(%addr : !llvm.ptr) {
+ %c128 = llvm.mlir.constant(128) : i32
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr {{%[0-9]+}}, i32 128)
+ nvvm.fence.proxy.acquire #nvvm.mem_scope<cta> %addr, %c128
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr {{%[0-9]+}}, i32 128)
+ nvvm.fence.proxy.acquire #nvvm.mem_scope<cluster> %addr, %c128
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr {{%[0-9]+}}, i32 128)
+ nvvm.fence.proxy.acquire #nvvm.mem_scope<gpu> %addr, %c128
+
+ // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr {{%[0-9]+}}, i32 128)
+ nvvm.fence.proxy.acquire #nvvm.mem_scope<sys> %addr, %c128
+ llvm.return
+}
+
+// CHECK-LABEL: @fence_mbarrier_init
+llvm.func @fence_mbarrier_init() {
+ // CHECK: call void @llvm.nvvm.fence.mbarrier_init.release.cluster()
+ nvvm.fence.mbarrier.init
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 5cba5c4fceefd..c4a69097692cb 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -699,42 +699,6 @@ llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant},
llvm.return
}
-
-// -----
-// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_release
-llvm.func @nvvm_fence_proxy_tensormap_generic_release() {
- %c128 = llvm.mlir.constant(128) : i32
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta()
- nvvm.fence.proxy.release #nvvm.mem_scope<cta>
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster()
- nvvm.fence.proxy.release #nvvm.mem_scope<cluster>
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu()
- nvvm.fence.proxy.release #nvvm.mem_scope<gpu>
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys()
- nvvm.fence.proxy.release #nvvm.mem_scope<sys>
- llvm.return
-}
-
-// -----
-// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_acquire
-llvm.func @nvvm_fence_proxy_tensormap_generic_acquire(%addr : !llvm.ptr) {
- %c128 = llvm.mlir.constant(128) : i32
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr {{%[0-9]+}}, i32 128)
- nvvm.fence.proxy.acquire #nvvm.mem_scope<cta> %addr, %c128
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr {{%[0-9]+}}, i32 128)
- nvvm.fence.proxy.acquire #nvvm.mem_scope<cluster> %addr, %c128
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr {{%[0-9]+}}, i32 128)
- nvvm.fence.proxy.acquire #nvvm.mem_scope<gpu> %addr, %c128
-
- // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr {{%[0-9]+}}, i32 128)
- nvvm.fence.proxy.acquire #nvvm.mem_scope<sys> %addr, %c128
- llvm.return
-}
// -----
// CHECK-LABEL: @nvvm_exit
More information about the Mlir-commits
mailing list