[llvm] Update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (PR #98968)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 11:07:24 PDT 2024


https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/98968

>From d1b385591bd6b5e4f751ee183b2f64f025a7b0c7 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 5 Jul 2024 15:43:24 -0700
Subject: [PATCH 1/6] Check for side effects when lowering target intrinsics,
 update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE

---
 llvm/include/llvm/IR/IntrinsicsNVVM.td        |  12 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   2 +-
 llvm/test/CodeGen/NVPTX/ldu-ldg.ll            | 126 ++++++++++++++++++
 3 files changed, 133 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 854eb2f8dd6df..1e7fdb53059e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
 // pointer's alignment.
 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.i">;
 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.f">;
 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.p">;
 
 // Generated within nvvm. Use for ldg on sm_35 or later.  Second arg is the
 // pointer's alignment.
 def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.i">;
 def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.f">;
 def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.p">;
 
 // Use for generic pointers
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b0746014daf5a..2fd12c7c0b1bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // definition.
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
-  bool OnlyLoad = HasChain && F->onlyReadsMemory();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index a449a1b1f713c..9c266c78f5ac3 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
   %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
   ret <2 x half> %val
 }
+
+; CHECK-LABEL: test_ldu_i8_dead
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u8
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i16_dead
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i32_dead
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u32
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i64_dead
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_p_dead
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u64
+  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f32_dead
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f64_dead
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f16_dead
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_v2f16_dead
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i8_dead
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u8
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i16_dead
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i32_dead
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u32
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i64_dead
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u64
+  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_p_dead
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u64
+  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f32_dead
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.f32
+  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f64_dead
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.f64
+  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f16_dead
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u16
+  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_v2f16_dead
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u32
+  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}

>From 5666ebd40633986fc6125ca234b32689d4407340 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 19 Jul 2024 10:51:16 -0700
Subject: [PATCH 2/6] Use function attributes rather than instruction

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2fd12c7c0b1bd..923ef3928b341 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // definition.
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
-  bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;

>From 84c32954f392c91b3ed09264c77ed888113a2a60 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 14:52:26 -0700
Subject: [PATCH 3/6] No SDAG change

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 923ef3928b341..b0746014daf5a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // definition.
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
-  bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;

>From a252b0d4022c303d19dc638d2dedd713615b7baf Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 15:11:38 -0700
Subject: [PATCH 4/6] Revert ldu-ldg.ll

---
 llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 -----------------------------
 1 file changed, 126 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index 9c266c78f5ac3..a449a1b1f713c 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,129 +148,3 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
   %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
   ret <2 x half> %val
 }
-
-; CHECK-LABEL: test_ldu_i8_dead
-define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u8
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_i16_dead
-define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u16
-  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_i32_dead
-define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u32
-  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_i64_dead
-define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u64
-  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_p_dead
-define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u64
-  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_f32_dead
-define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.f32
-  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_f64_dead
-define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.f64
-  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_f16_dead
-define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u16
-  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-; CHECK-LABEL: test_ldu_v2f16_dead
-define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ldu.global.u32
-  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_i8_dead
-define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u8
-  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_i16_dead
-define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_i32_dead
-define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u32
-  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_i64_dead
-define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u64
-  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_p_dead
-define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u64
-  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_f32_dead
-define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.f32
-  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_f64_dead
-define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.f64
-  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_f16_dead
-define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u16
-  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-; CHECK-LABEL: test_ldg_v2f16_dead
-define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
-  ; CHECK-NOT: ld.global.nc.u32
-  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}

>From 7f8f4f37a4de247a7eb61626b8904ee045df3179 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 15:13:26 -0700
Subject: [PATCH 5/6] Add new test to check that nvvm ldu/ldg intrinsics are
 DCE'd

---
 llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll | 187 ++++++++++++++++++
 1 file changed, 187 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll

diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
new file mode 100644
index 0000000000000..bc20b325d3dbb
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
+; them from being eliminated at IR level when dead.
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i8_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_p_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i8_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_p_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}

>From f66ce52e71d4f1087f1fb7759343bcf9363d480e Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 23 Jul 2024 11:07:10 -0700
Subject: [PATCH 6/6] Update test to run only DCE pass

---
 .../DCE/nvvm-ldu-ldg-willreturn.ll}           | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)
 rename llvm/test/{CodeGen/NVPTX/ldu-ldg-willreturn.ll => Transforms/DCE/nvvm-ldu-ldg-willreturn.ll} (76%)

diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
similarity index 76%
rename from llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
rename to llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
index bc20b325d3dbb..64a023ef45137 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
+++ b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -O3 -S | FileCheck %s
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
 
 ; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
 ; them from being eliminated at IR level when dead.
@@ -26,7 +26,7 @@ declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %
 
 define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_i8_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
@@ -35,7 +35,7 @@ define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_i16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -44,7 +44,7 @@ define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_i32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -53,7 +53,7 @@ define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_i64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -62,7 +62,7 @@ define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_p_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
@@ -71,7 +71,7 @@ define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_f32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -80,7 +80,7 @@ define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_f64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -89,7 +89,7 @@ define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -98,7 +98,7 @@ define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldu_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
@@ -107,7 +107,7 @@ define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_i8_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
@@ -116,7 +116,7 @@ define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_i16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -125,7 +125,7 @@ define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_i32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -134,7 +134,7 @@ define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_i64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -143,7 +143,7 @@ define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_p_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
@@ -152,7 +152,7 @@ define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_f32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -161,7 +161,7 @@ define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_f64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -170,7 +170,7 @@ define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -179,7 +179,7 @@ define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
 
 define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define void @test_ldg_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
   %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)



More information about the llvm-commits mailing list