[llvm] r349459 - [X86][SSE] Replace (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31) fold.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 18 00:55:47 PST 2018
Author: rksimon
Date: Tue Dec 18 00:55:47 2018
New Revision: 349459
URL: http://llvm.org/viewvc/llvm-project?rev=349459&view=rev
Log:
[X86][SSE] Replace (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31) fold.
This fold was incredibly specific - replace with a SimplifyDemandedBits fold to remove a VSRAI if only the original sign bit is demanded (its guaranteed to stay the same).
Test change is merely a rescheduling.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/combine-srem.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=349459&r1=349458&r2=349459&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 18 00:55:47 2018
@@ -32447,6 +32447,10 @@ bool X86TargetLowering::SimplifyDemanded
if (ShiftImm->getAPIntValue().uge(BitWidth))
break;
+ // If we just want the sign bit then we don't need to shift it.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op0);
+
unsigned ShAmt = ShiftImm->getZExtValue();
APInt DemandedMask = OriginalDemandedBits << ShAmt;
@@ -32507,7 +32511,7 @@ bool X86TargetLowering::SimplifyDemanded
else if (KnownSrc.Zero[SrcBits - 1])
Known.Zero.setLowBits(NumElts);
return false;
- }
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -35562,13 +35566,6 @@ static SDValue combineVectorShiftImm(SDN
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return DAG.getConstant(0, SDLoc(N), VT);
- // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
- // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
- // TODO - support other sra opcodes as needed.
- if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
- N0.getOpcode() == X86ISD::VSRAI)
- return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
-
// fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI &&
N1 == N0.getOperand(1)) {
Modified: llvm/trunk/test/CodeGen/X86/combine-srem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-srem.ll?rev=349459&r1=349458&r2=349459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-srem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-srem.ll Tue Dec 18 00:55:47 2018
@@ -252,35 +252,35 @@ define <4 x i32> @combine_vec_srem_by_po
; SSE-LABEL: combine_vec_srem_by_pow2b:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $31, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrld $29, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; SSE-NEXT: psrld $30, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE-NEXT: paddd %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: psrad $3, %xmm1
-; SSE-NEXT: movdqa %xmm2, %xmm3
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrld $29, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: psrld $31, %xmm3
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE-NEXT: psrld $30, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE-NEXT: paddd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrad $3, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psrad $1, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: psrad $2, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; SSE-NEXT: pmulld {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubd %xmm2, %xmm0
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE-NEXT: psrad $2, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_srem_by_pow2b:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
-; AVX1-NEXT: vpsrld $29, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; AVX1-NEXT: vpsrld $30, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $29, %xmm1, %xmm2
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
@@ -317,10 +317,10 @@ define <4 x i32> @combine_vec_srem_by_po
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psrld $30, %xmm3
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrld $31, %xmm2
-; SSE-NEXT: psrld $29, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrld $29, %xmm2
+; SSE-NEXT: psrld $31, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -344,9 +344,9 @@ define <4 x i32> @combine_vec_srem_by_po
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm3
-; AVX1-NEXT: vpsrld $29, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrld $29, %xmm1, %xmm3
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2
More information about the llvm-commits
mailing list