[llvm] Check for side effects when lowering target intrinsics, update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (PR #98968)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 15 14:46:29 PDT 2024


https://github.com/kalxr created https://github.com/llvm/llvm-project/pull/98968

Check for mayHaveSideEffects when lowering target intrinsics to avoid creating unused chains that are then erroneously eliminated. Without this check intrinsics with side effects (such as a those with !WillReturn) can be eliminated from the SDAG. This is inconsistent with behavior at the LLVM IR level where instructions that may have side effects are considered unsafe to remove.

Consider the NVVM ldu/ldg intrinsics and test updated in this patch for an illustration of the inconsistency we are resolving. The new testcases check that these intrinsics are removed when their results are unused. As they are loads with no side effects, this is desirable. Before the intrinsic and SDAG changes in this patch, these tests would have passed as the intrinsic instructions would have been removed at the SDAG level. They would not have been removed at the IR level as they were not IntrWillReturn. When we make the SDAG change alone, these tests fail, as the intrinsics are still not IntrWillReturn and thus are not safe to remove at either the IR or SDAG level. Thus we update the intrinsics with IntrWIllReturn, which combined with their other attributes makes them free of side-effects and safe to remove at both levels.

>From d1b385591bd6b5e4f751ee183b2f64f025a7b0c7 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 5 Jul 2024 15:43:24 -0700
Subject: [PATCH] Check for side effects when lowering target intrinsics,
 update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE

---
 llvm/include/llvm/IR/IntrinsicsNVVM.td        |  12 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   2 +-
 llvm/test/CodeGen/NVPTX/ldu-ldg.ll            | 126 ++++++++++++++++++
 3 files changed, 133 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 854eb2f8dd6df..1e7fdb53059e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
 // pointer's alignment.
 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.i">;
 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.f">;
 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.p">;
 
 // Generated within nvvm. Use for ldg on sm_35 or later.  Second arg is the
 // pointer's alignment.
 def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.i">;
 def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.f">;
 def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
   [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
+  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.p">;
 
 // Use for generic pointers
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b0746014daf5a..2fd12c7c0b1bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // definition.
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
-  bool OnlyLoad = HasChain && F->onlyReadsMemory();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index a449a1b1f713c..9c266c78f5ac3 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
   %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
   ret <2 x half> %val
 }
+
+; CHECK-LABEL: test_ldu_i8_dead
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u8
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i16_dead
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i32_dead
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u32
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_i64_dead
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_p_dead
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u64
+  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f32_dead
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f64_dead
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_f16_dead
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldu_v2f16_dead
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ldu.global.u32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i8_dead
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u8
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i16_dead
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i32_dead
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u32
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_i64_dead
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u64
+  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_p_dead
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u64
+  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f32_dead
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.f32
+  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f64_dead
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.f64
+  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_f16_dead
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u16
+  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: test_ldg_v2f16_dead
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+  ; CHECK-NOT: ld.global.nc.u32
+  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}



More information about the llvm-commits mailing list