[llvm] Check for side effects when lowering target intrinsics, update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (PR #98968)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 14:46:59 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-llvm-ir
Author: Kevin McAfee (kalxr)
<details>
<summary>Changes</summary>
Check for mayHaveSideEffects when lowering target intrinsics to avoid creating unused chains that are then erroneously eliminated. Without this check intrinsics with side effects (such as a those with !WillReturn) can be eliminated from the SDAG. This is inconsistent with behavior at the LLVM IR level where instructions that may have side effects are considered unsafe to remove.
Consider the NVVM ldu/ldg intrinsics and test updated in this patch for an illustration of the inconsistency we are resolving. The new testcases check that these intrinsics are removed when their results are unused. As they are loads with no side effects, this is desirable. Before the intrinsic and SDAG changes in this patch, these tests would have passed as the intrinsic instructions would have been removed at the SDAG level. They would not have been removed at the IR level as they were not IntrWillReturn. When we make the SDAG change alone, these tests fail, as the intrinsics are still not IntrWillReturn and thus are not safe to remove at either the IR or SDAG level. Thus we update the intrinsics with IntrWIllReturn, which combined with their other attributes makes them free of side-effects and safe to remove at both levels.
---
Full diff: https://github.com/llvm/llvm-project/pull/98968.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+6-6)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/ldu-ldg.ll (+126)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 854eb2f8dd6df..1e7fdb53059e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.p">;
// Use for generic pointers
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b0746014daf5a..2fd12c7c0b1bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
- bool OnlyLoad = HasChain && F->onlyReadsMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index a449a1b1f713c..9c266c78f5ac3 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
+
+; CHECK-LABEL: test_ldu_i8_dead
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u8
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i16_dead
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i32_dead
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_i64_dead
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_p_dead
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u64
+ %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f32_dead
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f32
+ %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f64_dead
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.f64
+ %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_f16_dead
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u16
+ %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldu_v2f16_dead
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ldu.global.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i8_dead
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u8
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i16_dead
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i32_dead
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_i64_dead
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_p_dead
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u64
+ %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f32_dead
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f32
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f64_dead
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.f64
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_f16_dead
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u16
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+ ret void
+}
+
+; CHECK-LABEL: test_ldg_v2f16_dead
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+ ; CHECK-NOT: ld.global.nc.u32
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/98968
More information about the llvm-commits
mailing list