[llvm] Add more cases for computeOverflowForSignedAdd (PR #99900)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 10:41:15 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/99900
>From f4d2cfd04d140b3f6d1c7f3a1d0740ffc057a431 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 22 Jul 2024 12:39:08 -0400
Subject: [PATCH] Add more cases for computeOverflowForSignedAdd for
ValueTracking
Even with fwrapv, IR knows this does not wrap:
https://alive2.llvm.org/ce/z/jsbr78
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 +++++++--
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 31 ++++++++++++-------
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 31 ++++++++++++-------
3 files changed, 53 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 02d44cd36ae53..aed6b30fa0c11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4242,8 +4242,19 @@ SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const {
if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
return OFK_Never;
- // TODO: Add ConstantRange::signedAddMayOverflow handling.
- return OFK_Sometime;
+ // smulhi + any value never overflow
+ if (N0.getOpcode() == ISD::SMUL_LOHI && N0.getResNo() == 1)
+ return OFK_Never;
+
+ if (N1.getOpcode() == ISD::SMUL_LOHI && N1.getResNo() == 1)
+ return OFK_Never;
+
+ // Fallback to ConstantRange::signedAddMayOverflow handling.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+ return mapOverflowResult(N0Range.signedAddMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
index 03f94d6e853f0..419e19083f85e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
@@ -5,7 +5,8 @@
define amdgpu_kernel void @raw_atomic_buffer_load_i32(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB0_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -33,7 +34,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_i32_off(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_i32_off:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -60,7 +62,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_i32_soff(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_i32_soff:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB2_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -87,7 +90,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_i32_dlc(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_i32_dlc:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB3_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -115,7 +119,8 @@ bb2:
define amdgpu_kernel void @raw_nonatomic_buffer_load_i32(<4 x i32> %addr) {
; CHECK-LABEL: raw_nonatomic_buffer_load_i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_load_b32 v1, off, s[0:3], 0 offset:4 glc
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -144,8 +149,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_i64(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_i64:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB5_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -174,7 +179,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_v2i16(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_v2i16:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB6_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -203,7 +209,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_v4i16(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_v4i16:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB7_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -236,7 +243,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_v4i32(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_v4i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB8_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -265,7 +273,8 @@ bb2:
define amdgpu_kernel void @raw_atomic_buffer_load_ptr(<4 x i32> %addr) {
; CHECK-LABEL: raw_atomic_buffer_load_ptr:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB9_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
index 3228335073d07..6541ac9553231 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
@@ -5,7 +5,8 @@
define amdgpu_kernel void @raw_ptr_atomic_buffer_ptr_load_i32(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_ptr_load_i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB0_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -33,7 +34,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_off(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_off:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -60,7 +62,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_soff(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_soff:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB2_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -87,7 +90,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_dlc(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_dlc:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB3_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -115,7 +119,8 @@ bb2:
define amdgpu_kernel void @raw_nonptr_atomic_buffer_load_i32(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_nonptr_atomic_buffer_load_i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_load_b32 v1, off, s[0:3], 0 offset:4 glc
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -144,8 +149,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i64(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_i64:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB5_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -174,7 +179,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v2i16(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_v2i16:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB6_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -203,7 +209,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v4i16(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_v4i16:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB7_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -236,7 +243,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v4i32(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_v4i32:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB8_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -265,7 +273,8 @@ bb2:
define amdgpu_kernel void @raw_ptr_atomic_buffer_load_ptr(ptr addrspace(8) %ptr) {
; CHECK-LABEL: raw_ptr_atomic_buffer_load_ptr:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB9_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list