[llvm] 8b47e29 - [X86] combineVectorShiftImm - fold (shl (add X, X), C) -> (shl X, (C + 1))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 14 09:42:16 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-14T17:42:02+01:00
New Revision: 8b47e29fa08e6234d67243e301ffe12f69a2d3af
URL: https://github.com/llvm/llvm-project/commit/8b47e29fa08e6234d67243e301ffe12f69a2d3af
DIFF: https://github.com/llvm/llvm-project/commit/8b47e29fa08e6234d67243e301ffe12f69a2d3af.diff
LOG: [X86] combineVectorShiftImm - fold (shl (add X, X), C) -> (shl X, (C + 1))
Noticed while investigating the regressions in D106675
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pmul.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3ff51374c1141..88004c8281f3c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47202,10 +47202,8 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// result are all ones, not undef.
return DAG.getConstant(-1, SDLoc(N), VT);
- // (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
- if (Opcode == N0.getOpcode()) {
- unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- unsigned NewShiftVal = ShiftVal + ShiftVal2;
+ auto MergeShifts = [&](SDValue X, uint64_t Amt0, uint64_t Amt1) {
+ unsigned NewShiftVal = Amt0 + Amt1;
if (NewShiftVal >= NumBitsPerElt) {
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
@@ -47215,7 +47213,16 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
}
return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0),
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
- }
+ };
+
+ // (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
+ if (Opcode == N0.getOpcode())
+ return MergeShifts(N0.getOperand(0), ShiftVal, N0.getConstantOperandVal(1));
+
+ // (shl (add X, X), C) -> (shl X, (C + 1))
+ if (Opcode == X86ISD::VSHLI && N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(0) == N0.getOperand(1))
+ return MergeShifts(N0.getOperand(0), ShiftVal, 1);
// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal % 8) == 0) {
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index 2a85218718e23..8dd6ce2b3057b 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -1356,8 +1356,7 @@ define <2 x i64> @pmuldq_square(<2 x i64> %x) {
; SSE2-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: pmuludq %xmm1, %xmm0
; SSE2-NEXT: pmuludq %xmm1, %xmm1
-; SSE2-NEXT: paddq %xmm0, %xmm0
-; SSE2-NEXT: psllq $32, %xmm0
+; SSE2-NEXT: psllq $33, %xmm0
; SSE2-NEXT: paddq %xmm1, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index ce3f635ab1346..04c9befa40fba 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -311,73 +311,69 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-LABEL: vec:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm9, %xmm9
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm9[2],xmm2[3],xmm9[3]
-; X64-NEXT: movq %xmm2, %rcx
-; X64-NEXT: movdqa %xmm0, %xmm2
-; X64-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm9[2],xmm2[3],xmm9[3]
-; X64-NEXT: paddq %xmm2, %xmm2
-; X64-NEXT: psllq $31, %xmm2
-; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: pxor %xmm3, %xmm3
+; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: movdqa %xmm1, %xmm4
+; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm9[2],xmm4[3],xmm9[3]
+; X64-NEXT: movq %xmm4, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm6
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X64-NEXT: movq %xmm2, %rax
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT: movq %xmm2, %rcx
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: movdqa %xmm1, %xmm3
+; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: movq %xmm3, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
-; X64-NEXT: movq %rax, %xmm2
-; X64-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm2[0]
+; X64-NEXT: movq %rax, %xmm3
+; X64-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0]
; X64-NEXT: movdqa {{.*#+}} xmm10 = [9223372039002259456,9223372039002259456]
-; X64-NEXT: movdqa %xmm6, %xmm2
-; X64-NEXT: pxor %xmm10, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
+; X64-NEXT: movdqa %xmm6, %xmm3
+; X64-NEXT: pxor %xmm10, %xmm3
+; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
; X64-NEXT: movdqa {{.*#+}} xmm8 = [2147483649,2147483649,2147483649,2147483649]
; X64-NEXT: pcmpeqd %xmm8, %xmm7
-; X64-NEXT: movdqa {{.*#+}} xmm4 = [9223372043297226751,9223372043297226751]
-; X64-NEXT: movdqa %xmm4, %xmm5
-; X64-NEXT: pcmpgtd %xmm2, %xmm5
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,0,2,2]
-; X64-NEXT: pand %xmm7, %xmm3
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
-; X64-NEXT: por %xmm3, %xmm2
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372043297226751,9223372043297226751]
+; X64-NEXT: movdqa %xmm2, %xmm5
+; X64-NEXT: pcmpgtd %xmm3, %xmm5
+; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,0,2,2]
+; X64-NEXT: pand %xmm7, %xmm4
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; X64-NEXT: por %xmm4, %xmm3
; X64-NEXT: movdqa {{.*#+}} xmm7 = [8589934591,8589934591]
-; X64-NEXT: pand %xmm2, %xmm6
-; X64-NEXT: pandn %xmm7, %xmm2
-; X64-NEXT: por %xmm6, %xmm2
-; X64-NEXT: psrlq $1, %xmm2
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
-; X64-NEXT: paddq %xmm0, %xmm0
-; X64-NEXT: psllq $31, %xmm0
-; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: pand %xmm3, %xmm6
+; X64-NEXT: pandn %xmm7, %xmm3
+; X64-NEXT: por %xmm6, %xmm3
+; X64-NEXT: psrlq $1, %xmm3
+; X64-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm0[0],xmm9[1],xmm0[1]
+; X64-NEXT: movq %xmm9, %rax
; X64-NEXT: movd %xmm1, %ecx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
-; X64-NEXT: movq %rax, %xmm3
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; X64-NEXT: movq %rax, %xmm4
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,2,3]
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: movq %xmm1, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm0
-; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; X64-NEXT: pxor %xmm3, %xmm10
+; X64-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
+; X64-NEXT: pxor %xmm4, %xmm10
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
; X64-NEXT: pcmpeqd %xmm8, %xmm0
-; X64-NEXT: pcmpgtd %xmm10, %xmm4
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
+; X64-NEXT: pcmpgtd %xmm10, %xmm2
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,2,2]
; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: pand %xmm0, %xmm3
+; X64-NEXT: pand %xmm0, %xmm4
; X64-NEXT: pandn %xmm7, %xmm0
-; X64-NEXT: por %xmm3, %xmm0
+; X64-NEXT: por %xmm4, %xmm0
; X64-NEXT: psrlq $1, %xmm0
-; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
; X64-NEXT: retq
;
; X86-LABEL: vec:
More information about the llvm-commits
mailing list