[llvm] Add more cases for computeOverflowForSignedAdd (PR #99900)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 22 10:41:15 PDT 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/99900

>From f4d2cfd04d140b3f6d1c7f3a1d0740ffc057a431 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 22 Jul 2024 12:39:08 -0400
Subject: [PATCH] Add more cases for computeOverflowForSignedAdd for
 ValueTracking

Even with fwrapv, IR knows this does not wrap:

https://alive2.llvm.org/ce/z/jsbr78
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 +++++++--
 .../llvm.amdgcn.raw.atomic.buffer.load.ll     | 31 ++++++++++++-------
 .../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 31 ++++++++++++-------
 3 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 02d44cd36ae53..aed6b30fa0c11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4242,8 +4242,19 @@ SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const {
   if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
     return OFK_Never;
 
-  // TODO: Add ConstantRange::signedAddMayOverflow handling.
-  return OFK_Sometime;
+  // smulhi + any value never overflow
+  if (N0.getOpcode() == ISD::SMUL_LOHI && N0.getResNo() == 1)
+    return OFK_Never;
+
+  if (N1.getOpcode() == ISD::SMUL_LOHI && N1.getResNo() == 1)
+    return OFK_Never;
+
+  // Fallback to ConstantRange::signedAddMayOverflow handling.
+  KnownBits N0Known = computeKnownBits(N0);
+  KnownBits N1Known = computeKnownBits(N1);
+  ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+  ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+  return mapOverflowResult(N0Range.signedAddMayOverflow(N1Range));
 }
 
 SelectionDAG::OverflowKind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
index 03f94d6e853f0..419e19083f85e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
@@ -5,7 +5,8 @@
 define amdgpu_kernel void @raw_atomic_buffer_load_i32(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB0_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -33,7 +34,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_i32_off(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_i32_off:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB1_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -60,7 +62,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_i32_soff(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_i32_soff:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB2_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -87,7 +90,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_i32_dlc(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_i32_dlc:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB3_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -115,7 +119,8 @@ bb2:
 define amdgpu_kernel void @raw_nonatomic_buffer_load_i32(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_nonatomic_buffer_load_i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    buffer_load_b32 v1, off, s[0:3], 0 offset:4 glc
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -144,8 +149,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_i64(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_i64:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB5_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -174,7 +179,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_v2i16(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_v2i16:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB6_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -203,7 +209,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_v4i16(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_v4i16:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB7_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -236,7 +243,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_v4i32(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_v4i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB8_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -265,7 +273,8 @@ bb2:
 define amdgpu_kernel void @raw_atomic_buffer_load_ptr(<4 x i32> %addr) {
 ; CHECK-LABEL: raw_atomic_buffer_load_ptr:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB9_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
index 3228335073d07..6541ac9553231 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
@@ -5,7 +5,8 @@
 define amdgpu_kernel void @raw_ptr_atomic_buffer_ptr_load_i32(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_ptr_load_i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB0_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -33,7 +34,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_off(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_off:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB1_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -60,7 +62,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_soff(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_soff:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB2_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -87,7 +90,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i32_dlc(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_i32_dlc:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB3_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -115,7 +119,8 @@ bb2:
 define amdgpu_kernel void @raw_nonptr_atomic_buffer_load_i32(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_nonptr_atomic_buffer_load_i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    buffer_load_b32 v1, off, s[0:3], 0 offset:4 glc
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -144,8 +149,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_i64(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_i64:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB5_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -174,7 +179,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v2i16(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_v2i16:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB6_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -203,7 +209,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v4i16(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_v4i16:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB7_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -236,7 +243,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_v4i32(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_v4i32:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB8_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -265,7 +273,8 @@ bb2:
 define amdgpu_kernel void @raw_ptr_atomic_buffer_load_ptr(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: raw_ptr_atomic_buffer_load_ptr:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  .LBB9_1: ; %bb1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1



More information about the llvm-commits mailing list