[llvm] subOverflow (PR #67890)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 30 14:10:34 PDT 2023
https://github.com/elhewaty created https://github.com/llvm/llvm-project/pull/67890
- Add tests for computeOverflowFor*Sub functions
- extend the computeOverflowForSignedSub/computeOverflowForUnsignedSub implementations with ConstantRange
>From 59440285f4fc1db7b22425e4f64e7b52fe25ef97 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Sat, 30 Sep 2023 22:18:39 +0300
Subject: [PATCH 1/2] Add tests for computeOverflowFor*Sub functions
---
llvm/test/CodeGen/X86/combine-subo.ll | 81 +++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
diff --git a/llvm/test/CodeGen/X86/combine-subo.ll b/llvm/test/CodeGen/X86/combine-subo.ll
index 6965f6d7af27b53..049a12a0cabd651 100644
--- a/llvm/test/CodeGen/X86/combine-subo.ll
+++ b/llvm/test/CodeGen/X86/combine-subo.ll
@@ -4,9 +4,14 @@
declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+
declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
+declare { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8>) nounwind readnone
; fold (ssub x, 0) -> x
define i32 @combine_ssub_zero(i32 %a0, i32 %a1) {
@@ -148,3 +153,79 @@ define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
%4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
ret <4 x i32> %4
}
+
+define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) {
+; CHECK-LABEL: combine_usub_nuw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF
+; CHECK-NEXT: subl %esi, %eax
+; CHECK-NEXT: setb %dl
+; CHECK-NEXT: retq
+ %aa = or i32 %a, 2147483648
+ %bb = and i32 %b, 2147483647
+ %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb)
+ ret { i32, i1 } %x
+}
+
+define { i8, i1 } @usub_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: usub_always_overflow:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orb $64, %dil
+; CHECK-NEXT: movb $63, %al
+; CHECK-NEXT: subb %dil, %al
+; CHECK-NEXT: setb %dl
+; CHECK-NEXT: retq
+ %y = or i8 %x, 64
+ %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y)
+ ret { i8, i1 } %a
+}
+
+define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: ssub_always_overflow:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpb $30, %dil
+; CHECK-NEXT: movl $29, %ecx
+; CHECK-NEXT: cmovgel %edi, %ecx
+; CHECK-NEXT: movb $-100, %al
+; CHECK-NEXT: subb %cl, %al
+; CHECK-NEXT: seto %dl
+; CHECK-NEXT: retq
+ %c = icmp sgt i8 %x, 29
+ %y = select i1 %c, i8 %x, i8 29
+ %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y)
+ ret { i8, i1 } %a
+}
+
+define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
+; SSE-LABEL: always_usub_const_vector:
+; SSE: # %bb.0:
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: always_usub_const_vector:
+; AVX: # %bb.0:
+; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: retq
+ %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 0, i8 0, i8 0, i8 0>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
+ ret { <4 x i8>, <4 x i1> } %x
+}
+
+define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind {
+; SSE-LABEL: never_usub_const_vector:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = <127,255,0,254,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: never_usub_const_vector:
+; AVX: # %bb.0:
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [127,255,0,254,127,255,0,254,127,255,0,254,127,255,0,254]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: retq
+ %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 128, i8 0, i8 255, i8 1>)
+ ret { <4 x i8>, <4 x i1> } %x
+}
>From c977b01e75982e9970a3696988b9095b40867519 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Sun, 1 Oct 2023 00:03:02 +0300
Subject: [PATCH 2/2] extend the
computeOverflowForSignedSub/computeOverflowForUnsignedSub implementations
with ConstantRange
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 14 ++++++++++----
llvm/test/CodeGen/X86/combine-subo.ll | 2 +-
llvm/test/CodeGen/X86/or-with-overflow.ll | 12 +++---------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cd21af770e1a4d9..0a61920b7c079ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4091,8 +4091,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
return OFK_Never;
- // TODO: Add ConstantRange::signedSubMayOverflow handling.
- return OFK_Sometime;
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+ return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
@@ -4101,8 +4104,11 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
if (isNullConstant(N1))
return OFK_Never;
- // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
- return OFK_Sometime;
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
diff --git a/llvm/test/CodeGen/X86/combine-subo.ll b/llvm/test/CodeGen/X86/combine-subo.ll
index 049a12a0cabd651..99f26525d49e5b5 100644
--- a/llvm/test/CodeGen/X86/combine-subo.ll
+++ b/llvm/test/CodeGen/X86/combine-subo.ll
@@ -161,7 +161,7 @@ define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) {
; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000
; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF
; CHECK-NEXT: subl %esi, %eax
-; CHECK-NEXT: setb %dl
+; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: retq
%aa = or i32 %a, 2147483648
%bb = and i32 %b, 2147483647
diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll
index 4440485af54bbaa..b3ffa209bc7004e 100644
--- a/llvm/test/CodeGen/X86/or-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/or-with-overflow.ll
@@ -161,19 +161,13 @@ define i32 @or_i32_rr(i32 %0, i32 %1) {
define i64 @or_i64_ri(i64 %0, i64 %1) nounwind {
; X86-LABEL: or_i64_ri:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $17, %ecx
-; X86-NEXT: cmpl $1, %ecx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sbbl $0, %esi
-; X86-NEXT: jl .LBB6_2
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: js .LBB6_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: orl $17, %eax
; X86-NEXT: .LBB6_2:
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: or_i64_ri:
More information about the llvm-commits
mailing list