[llvm] a2963d8 - [SDAG] fold sub-of-shift to add-of-shift
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 18 08:56:00 PST 2022
Author: Sanjay Patel
Date: 2022-02-18T11:55:50-05:00
New Revision: a2963d871ee5c2786de409b42f67ffcc39e53184
URL: https://github.com/llvm/llvm-project/commit/a2963d871ee5c2786de409b42f67ffcc39e53184
DIFF: https://github.com/llvm/llvm-project/commit/a2963d871ee5c2786de409b42f67ffcc39e53184.diff
LOG: [SDAG] fold sub-of-shift to add-of-shift
This fold is done in IR:
https://alive2.llvm.org/ce/z/jWyFrP
There is an x86 test that shows an improvement
from the added flexibility of using add (commutative).
The other diffs are presumed neutral.
Note that this could also be folded to an 'xor',
but I'm not sure if that would be universally better
(eg, x86 can convert adds more easily into LEA).
This helps prevent regressions from a potential fold for
issue #53829.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
llvm/test/CodeGen/X86/combine-srem.ll
llvm/test/CodeGen/X86/imul.ll
llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a0708336d26a2..89c3e41392882 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3656,6 +3656,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // As with the previous fold, prefer add for more folding potential.
+ // Subtracting SMIN/0 is the same as adding SMIN/0:
+ // N0 - (X << BW-1) --> N0 + (X << BW-1)
+ if (N1.getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
+ if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ }
+
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
index fc033bc741c19..c6f7377794413 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -208,7 +208,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-NEXT: movi v3.4s, #128, lsl #24
; CHECK-NEXT: usra v1.4s, v2.4s, #1
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index be2b7b86c8aed..575f37117f2da 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -74,7 +74,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE-NEXT: psubd %xmm1, %xmm0
+; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_srem_by_minsigned:
@@ -83,7 +83,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_vec_srem_by_minsigned:
@@ -93,7 +93,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
ret <4 x i32> %1
diff --git a/llvm/test/CodeGen/X86/imul.ll b/llvm/test/CodeGen/X86/imul.ll
index 4a4b159c68ef7..9131688c4efcc 100644
--- a/llvm/test/CodeGen/X86/imul.ll
+++ b/llvm/test/CodeGen/X86/imul.ll
@@ -529,9 +529,8 @@ define i64 @testNegOverflow(i64 %a) {
; X64-LABEL: testNegOverflow:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: shlq $63, %rcx
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: shlq $63, %rax
+; X64-NEXT: addq %rdi, %rax
; X64-NEXT: retq
;
; X86-LABEL: testNegOverflow:
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
index 4aa45ecde8736..95eb23fc3cd5d 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
@@ -622,7 +622,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-SSE-NEXT: psrld $1, %xmm1
; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE-NEXT: paddd %xmm1, %xmm0
; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE-NEXT: psrld $31, %xmm0
@@ -634,7 +634,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -647,7 +647,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -659,7 +659,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
More information about the llvm-commits
mailing list