[llvm] [NVPTX] Add support for local volatile memory operations (PR #150099)
Akshay Deodhar via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 12:52:14 PDT 2025
https://github.com/akshayrdeodhar created https://github.com/llvm/llvm-project/pull/150099
Support for local volatile loads/stores in NVPTX.
>From 10450dc808fabe9f365f99dc13cfc45697451849 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Mon, 21 Jul 2025 21:52:39 +0000
Subject: [PATCH] [NVPTX] Add support for local volatile memory operations
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 17 +++--
llvm/test/CodeGen/NVPTX/forward-ld-param.ll | 2 +-
llvm/test/CodeGen/NVPTX/load-store-scalars.ll | 24 +++----
llvm/test/CodeGen/NVPTX/load-store-sm-90.ll | 1 -
.../CodeGen/NVPTX/load-store-vectors-256.ll | 64 +++++++++----------
llvm/test/CodeGen/NVPTX/load-store-vectors.ll | 52 +++++++--------
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 4 +-
7 files changed, 83 insertions(+), 81 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 65e7c56774547..84762eb1bf71f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -645,15 +645,17 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
// Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
// behavior due to lack of Independent Forward Progress. Lowering these
// to weak memory operations in sm_60- is therefore fine.
- //
// TODO: lower atomic and volatile operations to memory locations
// in local, const, and param to two PTX instructions in sm_70+:
// - the "weak" memory instruction we are currently lowering to, and
// - some other instruction that preserves the side-effect, e.g.,
// a dead dummy volatile load.
- if (CodeAddrSpace == NVPTX::AddressSpace::Local ||
- CodeAddrSpace == NVPTX::AddressSpace::Const ||
- CodeAddrSpace == NVPTX::AddressSpace::Param) {
+
+ if (CodeAddrSpace == NVPTX::AddressSpace::Const ||
+ CodeAddrSpace == NVPTX::AddressSpace::Param ||
+ (CodeAddrSpace == NVPTX::AddressSpace::Local
+ && (!N->isVolatile() || Ordering != AtomicOrdering::NotAtomic))) {
+ // Allow non-atomic local volatile operations
return NVPTX::Ordering::NotAtomic;
}
@@ -677,12 +679,13 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
// from .generic, .global, or .shared. The behavior of PTX volatile and PTX
// atomics is undefined if the generic address does not refer to a .global or
// .shared memory location.
- bool AddrGenericOrGlobalOrShared =
+ bool AddrGenericOrGlobalOrSharedorLocal =
(CodeAddrSpace == NVPTX::AddressSpace::Generic ||
CodeAddrSpace == NVPTX::AddressSpace::Global ||
CodeAddrSpace == NVPTX::AddressSpace::Shared ||
- CodeAddrSpace == NVPTX::AddressSpace::SharedCluster);
- if (!AddrGenericOrGlobalOrShared)
+ CodeAddrSpace == NVPTX::AddressSpace::SharedCluster ||
+ CodeAddrSpace == NVPTX::AddressSpace::Local);
+ if (!AddrGenericOrGlobalOrSharedorLocal)
return NVPTX::Ordering::NotAtomic;
bool UseRelaxedMMIO =
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index ed8f6b4511079..f53fc3a27de15 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -85,7 +85,7 @@ define i32 @test_modify_param(ptr byval([10 x i32]) %a, i32 %b, i32 %c ) {
; CHECK-NEXT: mov.b64 %rd1, test_modify_param_param_0;
; CHECK-NEXT: ld.param.b32 %r1, [test_modify_param_param_1];
; CHECK-NEXT: ld.param.b32 %r2, [test_modify_param_param_2];
-; CHECK-NEXT: st.local.b32 [%rd1+2], %r1;
+; CHECK-NEXT: st.volatile.local.b32 [%rd1+2], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%p2 = getelementptr i8, ptr %a, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
index bac59be5158ea..58ca8d613b09b 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
@@ -2643,9 +2643,9 @@ define void @local_volatile_i8(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i8_param_0];
-; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
-; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
+; CHECK-NEXT: st.volatile.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
@@ -2661,9 +2661,9 @@ define void @local_volatile_i16(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i16_param_0];
-; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
-; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
+; CHECK-NEXT: st.volatile.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(5) %a
%a.add = add i16 %a.load, 1
@@ -2679,9 +2679,9 @@ define void @local_volatile_i32(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i32_param_0];
-; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
-; CHECK-NEXT: st.local.b32 [%rd1], %r2;
+; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(5) %a
%a.add = add i32 %a.load, 1
@@ -2696,9 +2696,9 @@ define void @local_volatile_i64(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i64_param_0];
-; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
-; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
+; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(5) %a
%a.add = add i64 %a.load, 1
@@ -2714,9 +2714,9 @@ define void @local_volatile_float(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_float_param_0];
-; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
-; CHECK-NEXT: st.local.b32 [%rd1], %r2;
+; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(5) %a
%a.add = fadd float %a.load, 1.
@@ -2731,9 +2731,9 @@ define void @local_volatile_double(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_double_param_0];
-; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
-; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
+; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(5) %a
%a.add = fadd double %a.load, 1.
diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
index ed170e92917f5..91a70e4468154 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
@@ -1550,7 +1550,6 @@ define void @shared_seq_cst_volatile_cluster(ptr addrspace(3) %a, ptr addrspace(
}
;; local statespace
-
; CHECK-LABEL: local_unordered_cluster
define void @local_unordered_cluster(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
; CHECK-LABEL: local_unordered_cluster(
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
index 68c53cde7f9ac..4423efcca1ff4 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
@@ -1280,11 +1280,11 @@ define void @local_volatile_32xi8(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_32xi8_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_32xi8_param_1];
-; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%a.load = load volatile <32 x i8>, ptr addrspace(5) %a
store volatile <32 x i8> %a.load, ptr addrspace(5) %b
@@ -1299,11 +1299,11 @@ define void @local_volatile_16xi16(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xi16_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xi16_param_1];
-; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%a.load = load volatile <16 x i16>, ptr addrspace(5) %a
store volatile <16 x i16> %a.load, ptr addrspace(5) %b
@@ -1318,11 +1318,11 @@ define void @local_volatile_16xhalf(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xhalf_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xhalf_param_1];
-; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%a.load = load volatile <16 x half>, ptr addrspace(5) %a
store volatile <16 x half> %a.load, ptr addrspace(5) %b
@@ -1337,11 +1337,11 @@ define void @local_volatile_16xbfloat(ptr addrspace(5) %a, ptr addrspace(5) %b)
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xbfloat_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xbfloat_param_1];
-; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%a.load = load volatile <16 x bfloat>, ptr addrspace(5) %a
store volatile <16 x bfloat> %a.load, ptr addrspace(5) %b
@@ -1356,11 +1356,11 @@ define void @local_volatile_8xi32(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi32_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_8xi32_param_1];
-; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%a.load = load volatile <8 x i32>, ptr addrspace(5) %a
store volatile <8 x i32> %a.load, ptr addrspace(5) %b
@@ -1374,11 +1374,11 @@ define void @local_volatile_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi64_param_0];
-; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xi64_param_1];
-; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i64>, ptr addrspace(5) %a
store volatile <4 x i64> %a.load, ptr addrspace(5) %b
@@ -1392,11 +1392,11 @@ define void @local_volatile_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xfloat_param_0];
-; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_8xfloat_param_1];
-; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
; CHECK-NEXT: ret;
%a.load = load volatile <8 x float>, ptr addrspace(5) %a
store volatile <8 x float> %a.load, ptr addrspace(5) %b
@@ -1410,11 +1410,11 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xdouble_param_0];
-; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xdouble_param_1];
-; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x double>, ptr addrspace(5) %a
store volatile <4 x double> %a.load, ptr addrspace(5) %b
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
index 7e013390a39db..37e66894110ca 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
@@ -2846,10 +2846,10 @@ define void @local_volatile_2xi8(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi8_param_0];
-; CHECK-NEXT: ld.local.v2.b8 {%rs1, %rs2}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
-; CHECK-NEXT: st.local.v2.b8 [%rd1], {%rs4, %rs3};
+; CHECK-NEXT: st.volatile.local.v2.b8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i8>, ptr addrspace(5) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
@@ -2866,7 +2866,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi8_param_0];
-; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1];
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
@@ -2886,7 +2886,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
-; CHECK-NEXT: st.local.b32 [%rd1], %r12;
+; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(5) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
@@ -2903,7 +2903,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi8_param_0];
-; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: prmt.b32 %r3, %r2, 0, 0x7773U;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
@@ -2942,7 +2942,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u32.u16 %r22, %rs16;
; CHECK-NEXT: prmt.b32 %r23, %r22, %r20, 0x3340U;
; CHECK-NEXT: prmt.b32 %r24, %r23, %r18, 0x5410U;
-; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r24, %r13};
+; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r24, %r13};
; CHECK-NEXT: ret;
%a.load = load volatile <8 x i8>, ptr addrspace(5) %a
%a.add = add <8 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -2959,7 +2959,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xi8_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: prmt.b32 %r5, %r4, 0, 0x7773U;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r5;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
@@ -3036,7 +3036,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u32.u16 %r46, %rs32;
; CHECK-NEXT: prmt.b32 %r47, %r46, %r44, 0x3340U;
; CHECK-NEXT: prmt.b32 %r48, %r47, %r42, 0x5410U;
-; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15};
; CHECK-NEXT: ret;
%a.load = load volatile <16 x i8>, ptr addrspace(5) %a
%a.add = add <16 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -3052,10 +3052,10 @@ define void @local_volatile_2xi16(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi16_param_0];
-; CHECK-NEXT: ld.local.v2.b16 {%rs1, %rs2}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b16 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
-; CHECK-NEXT: st.local.v2.b16 [%rd1], {%rs4, %rs3};
+; CHECK-NEXT: st.volatile.local.v2.b16 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i16>, ptr addrspace(5) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
@@ -3071,12 +3071,12 @@ define void @local_volatile_4xi16(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi16_param_0];
-; CHECK-NEXT: ld.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
-; CHECK-NEXT: st.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
+; CHECK-NEXT: st.volatile.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i16>, ptr addrspace(5) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
@@ -3093,7 +3093,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi16_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r4;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
@@ -3110,7 +3110,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) {
; CHECK-NEXT: add.s16 %rs15, %rs14, 1;
; CHECK-NEXT: add.s16 %rs16, %rs13, 1;
; CHECK-NEXT: mov.b32 %r8, {%rs16, %rs15};
-; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <8 x i16>, ptr addrspace(5) %a
%a.add = add <8 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -3126,10 +3126,10 @@ define void @local_volatile_2xi32(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi32_param_0];
-; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
-; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r4, %r3};
+; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i32>, ptr addrspace(5) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
@@ -3145,12 +3145,12 @@ define void @local_volatile_4xi32(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi32_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
-; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i32>, ptr addrspace(5) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
@@ -3165,10 +3165,10 @@ define void @local_volatile_2xi64(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi64_param_0];
-; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
-; CHECK-NEXT: st.local.v2.b64 [%rd1], {%rd5, %rd4};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i64>, ptr addrspace(5) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
@@ -3184,10 +3184,10 @@ define void @local_volatile_2xfloat(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xfloat_param_0];
-; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %r3, %r2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %r4, %r1, 0f3F800000;
-; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r4, %r3};
+; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x float>, ptr addrspace(5) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
@@ -3203,12 +3203,12 @@ define void @local_volatile_4xfloat(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xfloat_param_0];
-; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %r5, %r4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %r6, %r3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %r7, %r2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %r8, %r1, 0f3F800000;
-; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x float>, ptr addrspace(5) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
@@ -3223,10 +3223,10 @@ define void @local_volatile_2xdouble(ptr addrspace(5) %a) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xdouble_param_0];
-; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd4, %rd3, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %rd5, %rd2, 0d3FF0000000000000;
-; CHECK-NEXT: st.local.v2.b64 [%rd1], {%rd5, %rd4};
+; CHECK-NEXT: st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x double>, ptr addrspace(5) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 5c3017310d0a3..5b8018d8b32a7 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -18,7 +18,7 @@ define void @foo(i32 %a) {
; PTX32-NEXT: mov.b32 %SPL, __local_depot0;
; PTX32-NEXT: ld.param.b32 %r1, [foo_param_0];
; PTX32-NEXT: add.u32 %r3, %SPL, 0;
-; PTX32-NEXT: st.local.b32 [%r3], %r1;
+; PTX32-NEXT: st.volatile.local.b32 [%r3], %r1;
; PTX32-NEXT: ret;
;
; PTX64-LABEL: foo(
@@ -33,7 +33,7 @@ define void @foo(i32 %a) {
; PTX64-NEXT: mov.b64 %SPL, __local_depot0;
; PTX64-NEXT: ld.param.b32 %r1, [foo_param_0];
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX64-NEXT: st.local.b32 [%rd2], %r1;
+; PTX64-NEXT: st.volatile.local.b32 [%rd2], %r1;
; PTX64-NEXT: ret;
%local = alloca i32, align 4
store volatile i32 %a, ptr %local
More information about the llvm-commits
mailing list