[llvm] [NVPTX] Add support for local volatile memory operations (PR #150099)

Akshay Deodhar via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 22 12:52:14 PDT 2025


https://github.com/akshayrdeodhar created https://github.com/llvm/llvm-project/pull/150099

Support for local volatile loads/stores in NVPTX. 

>From 10450dc808fabe9f365f99dc13cfc45697451849 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Mon, 21 Jul 2025 21:52:39 +0000
Subject: [PATCH] [NVPTX] Add support for local volatile memory operations

---
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp   | 17 +++--
 llvm/test/CodeGen/NVPTX/forward-ld-param.ll   |  2 +-
 llvm/test/CodeGen/NVPTX/load-store-scalars.ll | 24 +++----
 llvm/test/CodeGen/NVPTX/load-store-sm-90.ll   |  1 -
 .../CodeGen/NVPTX/load-store-vectors-256.ll   | 64 +++++++++----------
 llvm/test/CodeGen/NVPTX/load-store-vectors.ll | 52 +++++++--------
 llvm/test/CodeGen/NVPTX/local-stack-frame.ll  |  4 +-
 7 files changed, 83 insertions(+), 81 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 65e7c56774547..84762eb1bf71f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -645,15 +645,17 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
   //      Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
   //      behavior due to lack of Independent Forward Progress. Lowering these
   //      to weak memory operations in sm_60- is therefore fine.
-  //
   //      TODO: lower atomic and volatile operations to memory locations
   //      in local, const, and param to two PTX instructions in sm_70+:
   //        - the "weak" memory instruction we are currently lowering to, and
   //        - some other instruction that preserves the side-effect, e.g.,
   //          a dead dummy volatile load.
-  if (CodeAddrSpace == NVPTX::AddressSpace::Local ||
-      CodeAddrSpace == NVPTX::AddressSpace::Const ||
-      CodeAddrSpace == NVPTX::AddressSpace::Param) {
+
+  if (CodeAddrSpace == NVPTX::AddressSpace::Const ||
+      CodeAddrSpace == NVPTX::AddressSpace::Param ||
+      (CodeAddrSpace == NVPTX::AddressSpace::Local
+      && (!N->isVolatile() || Ordering != AtomicOrdering::NotAtomic))) {
+    // Allow non-atomic local volatile operations
     return NVPTX::Ordering::NotAtomic;
   }
 
@@ -677,12 +679,13 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
   // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
   // atomics is undefined if the generic address does not refer to a .global or
   // .shared memory location.
-  bool AddrGenericOrGlobalOrShared =
+  bool AddrGenericOrGlobalOrSharedorLocal =
       (CodeAddrSpace == NVPTX::AddressSpace::Generic ||
        CodeAddrSpace == NVPTX::AddressSpace::Global ||
        CodeAddrSpace == NVPTX::AddressSpace::Shared ||
-       CodeAddrSpace == NVPTX::AddressSpace::SharedCluster);
-  if (!AddrGenericOrGlobalOrShared)
+       CodeAddrSpace == NVPTX::AddressSpace::SharedCluster ||
+       CodeAddrSpace == NVPTX::AddressSpace::Local);
+  if (!AddrGenericOrGlobalOrSharedorLocal)
     return NVPTX::Ordering::NotAtomic;
 
   bool UseRelaxedMMIO =
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index ed8f6b4511079..f53fc3a27de15 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -85,7 +85,7 @@ define i32 @test_modify_param(ptr byval([10 x i32]) %a, i32 %b, i32 %c ) {
 ; CHECK-NEXT:    mov.b64 %rd1, test_modify_param_param_0;
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_modify_param_param_1];
 ; CHECK-NEXT:    ld.param.b32 %r2, [test_modify_param_param_2];
-; CHECK-NEXT:    st.local.b32 [%rd1+2], %r1;
+; CHECK-NEXT:    st.volatile.local.b32 [%rd1+2], %r1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;
   %p2 = getelementptr i8, ptr %a, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
index bac59be5158ea..58ca8d613b09b 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
@@ -2643,9 +2643,9 @@ define void @local_volatile_i8(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_i8_param_0];
-; CHECK-NEXT:    ld.local.b8 %rs1, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b8 %rs1, [%rd1];
 ; CHECK-NEXT:    add.s16 %rs2, %rs1, 1;
-; CHECK-NEXT:    st.local.b8 [%rd1], %rs2;
+; CHECK-NEXT:    st.volatile.local.b8 [%rd1], %rs2;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile i8, ptr addrspace(5) %a
   %a.add = add i8 %a.load, 1
@@ -2661,9 +2661,9 @@ define void @local_volatile_i16(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_i16_param_0];
-; CHECK-NEXT:    ld.local.b16 %rs1, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b16 %rs1, [%rd1];
 ; CHECK-NEXT:    add.s16 %rs2, %rs1, 1;
-; CHECK-NEXT:    st.local.b16 [%rd1], %rs2;
+; CHECK-NEXT:    st.volatile.local.b16 [%rd1], %rs2;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile i16, ptr addrspace(5) %a
   %a.add = add i16 %a.load, 1
@@ -2679,9 +2679,9 @@ define void @local_volatile_i32(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_i32_param_0];
-; CHECK-NEXT:    ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b32 %r1, [%rd1];
 ; CHECK-NEXT:    add.s32 %r2, %r1, 1;
-; CHECK-NEXT:    st.local.b32 [%rd1], %r2;
+; CHECK-NEXT:    st.volatile.local.b32 [%rd1], %r2;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile i32, ptr addrspace(5) %a
   %a.add = add i32 %a.load, 1
@@ -2696,9 +2696,9 @@ define void @local_volatile_i64(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_i64_param_0];
-; CHECK-NEXT:    ld.local.b64 %rd2, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b64 %rd2, [%rd1];
 ; CHECK-NEXT:    add.s64 %rd3, %rd2, 1;
-; CHECK-NEXT:    st.local.b64 [%rd1], %rd3;
+; CHECK-NEXT:    st.volatile.local.b64 [%rd1], %rd3;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile i64, ptr addrspace(5) %a
   %a.add = add i64 %a.load, 1
@@ -2714,9 +2714,9 @@ define void @local_volatile_float(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_float_param_0];
-; CHECK-NEXT:    ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b32 %r1, [%rd1];
 ; CHECK-NEXT:    add.rn.f32 %r2, %r1, 0f3F800000;
-; CHECK-NEXT:    st.local.b32 [%rd1], %r2;
+; CHECK-NEXT:    st.volatile.local.b32 [%rd1], %r2;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile float, ptr addrspace(5) %a
   %a.add = fadd float %a.load, 1.
@@ -2731,9 +2731,9 @@ define void @local_volatile_double(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_double_param_0];
-; CHECK-NEXT:    ld.local.b64 %rd2, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b64 %rd2, [%rd1];
 ; CHECK-NEXT:    add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
-; CHECK-NEXT:    st.local.b64 [%rd1], %rd3;
+; CHECK-NEXT:    st.volatile.local.b64 [%rd1], %rd3;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile double, ptr addrspace(5) %a
   %a.add = fadd double %a.load, 1.
diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
index ed170e92917f5..91a70e4468154 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll
@@ -1550,7 +1550,6 @@ define void @shared_seq_cst_volatile_cluster(ptr addrspace(3) %a, ptr addrspace(
 }
 
 ;; local statespace
-
 ; CHECK-LABEL: local_unordered_cluster
 define void @local_unordered_cluster(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
 ; CHECK-LABEL: local_unordered_cluster(
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
index 68c53cde7f9ac..4423efcca1ff4 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
@@ -1280,11 +1280,11 @@ define void @local_volatile_32xi8(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_32xi8_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT:    ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [local_volatile_32xi8_param_1];
-; CHECK-NEXT:    st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT:    st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <32 x i8>, ptr addrspace(5) %a
   store volatile <32 x i8> %a.load, ptr addrspace(5) %b
@@ -1299,11 +1299,11 @@ define void @local_volatile_16xi16(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_16xi16_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT:    ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [local_volatile_16xi16_param_1];
-; CHECK-NEXT:    st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT:    st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <16 x i16>, ptr addrspace(5) %a
   store volatile <16 x i16> %a.load, ptr addrspace(5) %b
@@ -1318,11 +1318,11 @@ define void @local_volatile_16xhalf(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_16xhalf_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT:    ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [local_volatile_16xhalf_param_1];
-; CHECK-NEXT:    st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT:    st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <16 x half>, ptr addrspace(5) %a
   store volatile <16 x half> %a.load, ptr addrspace(5) %b
@@ -1337,11 +1337,11 @@ define void @local_volatile_16xbfloat(ptr addrspace(5) %a, ptr addrspace(5) %b)
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_16xbfloat_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT:    ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [local_volatile_16xbfloat_param_1];
-; CHECK-NEXT:    st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT:    st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <16 x bfloat>, ptr addrspace(5) %a
   store volatile <16 x bfloat> %a.load, ptr addrspace(5) %b
@@ -1356,11 +1356,11 @@ define void @local_volatile_8xi32(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_8xi32_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; CHECK-NEXT:    ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [local_volatile_8xi32_param_1];
-; CHECK-NEXT:    st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
-; CHECK-NEXT:    st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <8 x i32>, ptr addrspace(5) %a
   store volatile <8 x i32> %a.load, ptr addrspace(5) %b
@@ -1374,11 +1374,11 @@ define void @local_volatile_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xi64_param_0];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd6, [local_volatile_4xi64_param_1];
-; CHECK-NEXT:    st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT:    st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x i64>, ptr addrspace(5) %a
   store volatile <4 x i64> %a.load, ptr addrspace(5) %b
@@ -1392,11 +1392,11 @@ define void @local_volatile_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_8xfloat_param_0];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd6, [local_volatile_8xfloat_param_1];
-; CHECK-NEXT:    st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT:    st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <8 x float>, ptr addrspace(5) %a
   store volatile <8 x float> %a.load, ptr addrspace(5) %b
@@ -1410,11 +1410,11 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xdouble_param_0];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
 ; CHECK-NEXT:    ld.param.b64 %rd6, [local_volatile_4xdouble_param_1];
-; CHECK-NEXT:    st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
-; CHECK-NEXT:    st.local.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x double>, ptr addrspace(5) %a
   store volatile <4 x double> %a.load, ptr addrspace(5) %b
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
index 7e013390a39db..37e66894110ca 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll
@@ -2846,10 +2846,10 @@ define void @local_volatile_2xi8(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xi8_param_0];
-; CHECK-NEXT:    ld.local.v2.b8 {%rs1, %rs2}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b8 {%rs1, %rs2}, [%rd1];
 ; CHECK-NEXT:    add.s16 %rs3, %rs2, 1;
 ; CHECK-NEXT:    add.s16 %rs4, %rs1, 1;
-; CHECK-NEXT:    st.local.v2.b8 [%rd1], {%rs4, %rs3};
+; CHECK-NEXT:    st.volatile.local.v2.b8 [%rd1], {%rs4, %rs3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x i8>, ptr addrspace(5) %a
   %a.add = add <2 x i8> %a.load, <i8 1, i8 1>
@@ -2866,7 +2866,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xi8_param_0];
-; CHECK-NEXT:    ld.local.b32 %r1, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.b32 %r1, [%rd1];
 ; CHECK-NEXT:    prmt.b32 %r2, %r1, 0, 0x7773U;
 ; CHECK-NEXT:    cvt.u16.u32 %rs1, %r2;
 ; CHECK-NEXT:    add.s16 %rs2, %rs1, 1;
@@ -2886,7 +2886,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
 ; CHECK-NEXT:    cvt.u32.u16 %r10, %rs8;
 ; CHECK-NEXT:    prmt.b32 %r11, %r10, %r8, 0x3340U;
 ; CHECK-NEXT:    prmt.b32 %r12, %r11, %r6, 0x5410U;
-; CHECK-NEXT:    st.local.b32 [%rd1], %r12;
+; CHECK-NEXT:    st.volatile.local.b32 [%rd1], %r12;
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x i8>, ptr addrspace(5) %a
   %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
@@ -2903,7 +2903,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_8xi8_param_0];
-; CHECK-NEXT:    ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
 ; CHECK-NEXT:    prmt.b32 %r3, %r2, 0, 0x7773U;
 ; CHECK-NEXT:    cvt.u16.u32 %rs1, %r3;
 ; CHECK-NEXT:    add.s16 %rs2, %rs1, 1;
@@ -2942,7 +2942,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) {
 ; CHECK-NEXT:    cvt.u32.u16 %r22, %rs16;
 ; CHECK-NEXT:    prmt.b32 %r23, %r22, %r20, 0x3340U;
 ; CHECK-NEXT:    prmt.b32 %r24, %r23, %r18, 0x5410U;
-; CHECK-NEXT:    st.local.v2.b32 [%rd1], {%r24, %r13};
+; CHECK-NEXT:    st.volatile.local.v2.b32 [%rd1], {%r24, %r13};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <8 x i8>, ptr addrspace(5) %a
   %a.add = add <8 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -2959,7 +2959,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_16xi8_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
 ; CHECK-NEXT:    prmt.b32 %r5, %r4, 0, 0x7773U;
 ; CHECK-NEXT:    cvt.u16.u32 %rs1, %r5;
 ; CHECK-NEXT:    add.s16 %rs2, %rs1, 1;
@@ -3036,7 +3036,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) {
 ; CHECK-NEXT:    cvt.u32.u16 %r46, %rs32;
 ; CHECK-NEXT:    prmt.b32 %r47, %r46, %r44, 0x3340U;
 ; CHECK-NEXT:    prmt.b32 %r48, %r47, %r42, 0x5410U;
-; CHECK-NEXT:    st.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <16 x i8>, ptr addrspace(5) %a
   %a.add = add <16 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -3052,10 +3052,10 @@ define void @local_volatile_2xi16(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xi16_param_0];
-; CHECK-NEXT:    ld.local.v2.b16 {%rs1, %rs2}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b16 {%rs1, %rs2}, [%rd1];
 ; CHECK-NEXT:    add.s16 %rs3, %rs2, 1;
 ; CHECK-NEXT:    add.s16 %rs4, %rs1, 1;
-; CHECK-NEXT:    st.local.v2.b16 [%rd1], {%rs4, %rs3};
+; CHECK-NEXT:    st.volatile.local.v2.b16 [%rd1], {%rs4, %rs3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x i16>, ptr addrspace(5) %a
   %a.add = add <2 x i16> %a.load, <i16 1, i16 1>
@@ -3071,12 +3071,12 @@ define void @local_volatile_4xi16(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xi16_param_0];
-; CHECK-NEXT:    ld.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
 ; CHECK-NEXT:    add.s16 %rs5, %rs4, 1;
 ; CHECK-NEXT:    add.s16 %rs6, %rs3, 1;
 ; CHECK-NEXT:    add.s16 %rs7, %rs2, 1;
 ; CHECK-NEXT:    add.s16 %rs8, %rs1, 1;
-; CHECK-NEXT:    st.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
+; CHECK-NEXT:    st.volatile.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x i16>, ptr addrspace(5) %a
   %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
@@ -3093,7 +3093,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_8xi16_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
 ; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
 ; CHECK-NEXT:    add.s16 %rs3, %rs2, 1;
 ; CHECK-NEXT:    add.s16 %rs4, %rs1, 1;
@@ -3110,7 +3110,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) {
 ; CHECK-NEXT:    add.s16 %rs15, %rs14, 1;
 ; CHECK-NEXT:    add.s16 %rs16, %rs13, 1;
 ; CHECK-NEXT:    mov.b32 %r8, {%rs16, %rs15};
-; CHECK-NEXT:    st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <8 x i16>, ptr addrspace(5) %a
   %a.add = add <8 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -3126,10 +3126,10 @@ define void @local_volatile_2xi32(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xi32_param_0];
-; CHECK-NEXT:    ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
 ; CHECK-NEXT:    add.s32 %r3, %r2, 1;
 ; CHECK-NEXT:    add.s32 %r4, %r1, 1;
-; CHECK-NEXT:    st.local.v2.b32 [%rd1], {%r4, %r3};
+; CHECK-NEXT:    st.volatile.local.v2.b32 [%rd1], {%r4, %r3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x i32>, ptr addrspace(5) %a
   %a.add = add <2 x i32> %a.load, <i32 1, i32 1>
@@ -3145,12 +3145,12 @@ define void @local_volatile_4xi32(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xi32_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
 ; CHECK-NEXT:    add.s32 %r5, %r4, 1;
 ; CHECK-NEXT:    add.s32 %r6, %r3, 1;
 ; CHECK-NEXT:    add.s32 %r7, %r2, 1;
 ; CHECK-NEXT:    add.s32 %r8, %r1, 1;
-; CHECK-NEXT:    st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x i32>, ptr addrspace(5) %a
   %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
@@ -3165,10 +3165,10 @@ define void @local_volatile_2xi64(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xi64_param_0];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
 ; CHECK-NEXT:    add.s64 %rd4, %rd3, 1;
 ; CHECK-NEXT:    add.s64 %rd5, %rd2, 1;
-; CHECK-NEXT:    st.local.v2.b64 [%rd1], {%rd5, %rd4};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x i64>, ptr addrspace(5) %a
   %a.add = add <2 x i64> %a.load, <i64 1, i64 1>
@@ -3184,10 +3184,10 @@ define void @local_volatile_2xfloat(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xfloat_param_0];
-; CHECK-NEXT:    ld.local.v2.b32 {%r1, %r2}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1];
 ; CHECK-NEXT:    add.rn.f32 %r3, %r2, 0f3F800000;
 ; CHECK-NEXT:    add.rn.f32 %r4, %r1, 0f3F800000;
-; CHECK-NEXT:    st.local.v2.b32 [%rd1], {%r4, %r3};
+; CHECK-NEXT:    st.volatile.local.v2.b32 [%rd1], {%r4, %r3};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x float>, ptr addrspace(5) %a
   %a.add = fadd <2 x float> %a.load, <float 1., float 1.>
@@ -3203,12 +3203,12 @@ define void @local_volatile_4xfloat(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_4xfloat_param_0];
-; CHECK-NEXT:    ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
 ; CHECK-NEXT:    add.rn.f32 %r5, %r4, 0f3F800000;
 ; CHECK-NEXT:    add.rn.f32 %r6, %r3, 0f3F800000;
 ; CHECK-NEXT:    add.rn.f32 %r7, %r2, 0f3F800000;
 ; CHECK-NEXT:    add.rn.f32 %r8, %r1, 0f3F800000;
-; CHECK-NEXT:    st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
+; CHECK-NEXT:    st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <4 x float>, ptr addrspace(5) %a
   %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
@@ -3223,10 +3223,10 @@ define void @local_volatile_2xdouble(ptr addrspace(5) %a) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [local_volatile_2xdouble_param_0];
-; CHECK-NEXT:    ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
 ; CHECK-NEXT:    add.rn.f64 %rd4, %rd3, 0d3FF0000000000000;
 ; CHECK-NEXT:    add.rn.f64 %rd5, %rd2, 0d3FF0000000000000;
-; CHECK-NEXT:    st.local.v2.b64 [%rd1], {%rd5, %rd4};
+; CHECK-NEXT:    st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4};
 ; CHECK-NEXT:    ret;
   %a.load = load volatile <2 x double>, ptr addrspace(5) %a
   %a.add = fadd <2 x double> %a.load, <double 1., double 1.>
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 5c3017310d0a3..5b8018d8b32a7 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -18,7 +18,7 @@ define void @foo(i32 %a) {
 ; PTX32-NEXT:    mov.b32 %SPL, __local_depot0;
 ; PTX32-NEXT:    ld.param.b32 %r1, [foo_param_0];
 ; PTX32-NEXT:    add.u32 %r3, %SPL, 0;
-; PTX32-NEXT:    st.local.b32 [%r3], %r1;
+; PTX32-NEXT:    st.volatile.local.b32 [%r3], %r1;
 ; PTX32-NEXT:    ret;
 ;
 ; PTX64-LABEL: foo(
@@ -33,7 +33,7 @@ define void @foo(i32 %a) {
 ; PTX64-NEXT:    mov.b64 %SPL, __local_depot0;
 ; PTX64-NEXT:    ld.param.b32 %r1, [foo_param_0];
 ; PTX64-NEXT:    add.u64 %rd2, %SPL, 0;
-; PTX64-NEXT:    st.local.b32 [%rd2], %r1;
+; PTX64-NEXT:    st.volatile.local.b32 [%rd2], %r1;
 ; PTX64-NEXT:    ret;
   %local = alloca i32, align 4
   store volatile i32 %a, ptr %local



More information about the llvm-commits mailing list