[llvm] Update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (PR #98968)
Kevin McAfee via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 11:07:24 PDT 2024
https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/98968
>From d1b385591bd6b5e4f751ee183b2f64f025a7b0c7 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 5 Jul 2024 15:43:24 -0700
Subject: [PATCH 1/6] Check for side effects when lowering target intrinsics,
update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE
---
llvm/include/llvm/IR/IntrinsicsNVVM.td | 12 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 ++++++++++++++++++
3 files changed, 133 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 854eb2f8dd6df..1e7fdb53059e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.p">;
// Use for generic pointers
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b0746014daf5a..2fd12c7c0b1bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index a449a1b1f713c..9c266c78f5ac3 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
+
+; CHECK-LABEL: test_ldu_i8_dead
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u8
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i16_dead
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i32_dead
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i64_dead
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_p_dead
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f32_dead
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f32
+ %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f64_dead
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f64
+ %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f16_dead
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_v2f16_dead
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i8_dead
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u8
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i16_dead
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i32_dead
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i64_dead
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_p_dead
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f32_dead
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f32
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f64_dead
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f64
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f16_dead
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_v2f16_dead
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
>From 5666ebd40633986fc6125ca234b32689d4407340 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 19 Jul 2024 10:51:16 -0700
Subject: [PATCH 2/6] Use function attributes rather than instruction
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2fd12c7c0b1bd..923ef3928b341 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
>From 84c32954f392c91b3ed09264c77ed888113a2a60 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 14:52:26 -0700
Subject: [PATCH 3/6] No SDAG change
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 923ef3928b341..b0746014daf5a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
>From a252b0d4022c303d19dc638d2dedd713615b7baf Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 15:11:38 -0700
Subject: [PATCH 4/6] Revert ldu-ldg.ll
---
llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 -----------------------------
1 file changed, 126 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index 9c266c78f5ac3..a449a1b1f713c 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,129 +148,3 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
-
-; CHECK-LABEL: test_ldu_i8_dead
-define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u8
- %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_i16_dead
-define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u16
- %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_i32_dead
-define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u32
- %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_i64_dead
-define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u64
- %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_p_dead
-define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u64
- %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_f32_dead
-define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.f32
- %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_f64_dead
-define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.f64
- %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_f16_dead
-define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u16
- %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
- ret void
-}
-
-; CHECK-LABEL: test_ldu_v2f16_dead
-define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ldu.global.u32
- %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_i8_dead
-define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u8
- %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_i16_dead
-define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u16
- %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_i32_dead
-define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u32
- %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_i64_dead
-define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u64
- %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_p_dead
-define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u64
- %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_f32_dead
-define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.f32
- %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_f64_dead
-define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.f64
- %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_f16_dead
-define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u16
- %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
- ret void
-}
-
-; CHECK-LABEL: test_ldg_v2f16_dead
-define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
- ; CHECK-NOT: ld.global.nc.u32
- %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
- ret void
-}
>From 7f8f4f37a4de247a7eb61626b8904ee045df3179 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 22 Jul 2024 15:13:26 -0700
Subject: [PATCH 5/6] Add new test to check that nvvm ldu/ldg intrinsics are
DCE'd
---
llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll | 187 ++++++++++++++++++
1 file changed, 187 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
new file mode 100644
index 0000000000000..bc20b325d3dbb
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
+; them from being eliminated at IR level when dead.
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i8_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_p_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i8_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_p_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f32_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f64_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
>From f66ce52e71d4f1087f1fb7759343bcf9363d480e Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 23 Jul 2024 11:07:10 -0700
Subject: [PATCH 6/6] Update test to run only DCE pass
---
.../DCE/nvvm-ldu-ldg-willreturn.ll} | 38 +++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
rename llvm/test/{CodeGen/NVPTX/ldu-ldg-willreturn.ll => Transforms/DCE/nvvm-ldu-ldg-willreturn.ll} (76%)
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
similarity index 76%
rename from llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
rename to llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
index bc20b325d3dbb..64a023ef45137 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll
+++ b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -O3 -S | FileCheck %s
+; RUN: opt -S < %s -passes=dce | FileCheck %s
; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
; them from being eliminated at IR level when dead.
@@ -26,7 +26,7 @@ declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %
define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_i8_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
@@ -35,7 +35,7 @@ define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_i16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -44,7 +44,7 @@ define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_i32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -53,7 +53,7 @@ define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_i64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -62,7 +62,7 @@ define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_p_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
@@ -71,7 +71,7 @@ define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_f32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -80,7 +80,7 @@ define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_f64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -89,7 +89,7 @@ define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -98,7 +98,7 @@ define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldu_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
@@ -107,7 +107,7 @@ define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_i8_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
@@ -116,7 +116,7 @@ define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_i16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -125,7 +125,7 @@ define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_i32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -134,7 +134,7 @@ define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_i64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -143,7 +143,7 @@ define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_p_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
@@ -152,7 +152,7 @@ define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_f32_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
@@ -161,7 +161,7 @@ define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_f64_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
@@ -170,7 +170,7 @@ define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
@@ -179,7 +179,7 @@ define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @test_ldg_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: ret void
;
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
More information about the llvm-commits
mailing list