[llvm] Check for side effects when lowering target intrinsics, update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (PR #98968)
Kevin McAfee via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 10:51:42 PDT 2024
https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/98968
>From d1b385591bd6b5e4f751ee183b2f64f025a7b0c7 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 5 Jul 2024 15:43:24 -0700
Subject: [PATCH 1/2] Check for side effects when lowering target intrinsics,
update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE
---
llvm/include/llvm/IR/IntrinsicsNVVM.td | 12 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 ++++++++++++++++++
3 files changed, 133 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 854eb2f8dd6df..1e7fdb53059e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.p">;
// Use for generic pointers
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b0746014daf5a..2fd12c7c0b1bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index a449a1b1f713c..9c266c78f5ac3 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
+
+; CHECK-LABEL: test_ldu_i8_dead
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u8
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i16_dead
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i32_dead
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i64_dead
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_p_dead
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f32_dead
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f32
+ %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f64_dead
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f64
+ %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f16_dead
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_v2f16_dead
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i8_dead
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u8
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i16_dead
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i32_dead
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i64_dead
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_p_dead
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f32_dead
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f32
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f64_dead
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f64
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f16_dead
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_v2f16_dead
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
>From 5666ebd40633986fc6125ca234b32689d4407340 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 19 Jul 2024 10:51:16 -0700
Subject: [PATCH 2/2] Use function attributes rather than instruction
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2fd12c7c0b1bd..923ef3928b341 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
More information about the llvm-commits
mailing list