[llvm] r350399 - [X86] Add VPSLLI/VPSRLI ((X >>u C1) << C2) SimplifyDemandedBits combine
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 4 07:43:43 PST 2019
Author: rksimon
Date: Fri Jan 4 07:43:43 2019
New Revision: 350399
URL: http://llvm.org/viewvc/llvm-project?rev=350399&view=rev
Log:
[X86] Add VPSLLI/VPSRLI ((X >>u C1) << C2) SimplifyDemandedBits combine
Repeat of the generic SimplifyDemandedBits shift combine
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-trunc-widen.ll
llvm/trunk/test/CodeGen/X86/vector-trunc.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350399&r1=350398&r2=350399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 4 07:43:43 2019
@@ -32403,15 +32403,38 @@ bool X86TargetLowering::SimplifyDemanded
break;
}
case X86ISD::VSHLI: {
- if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
if (ShiftImm->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = ShiftImm->getZExtValue();
APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
- if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
- OriginalDemandedElts, Known, TLO, Depth + 1))
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Op0.getOpcode() == X86ISD::VSRLI &&
+ OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+ if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
+ if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
+ int Diff = ShAmt - Shift2Imm->getZExtValue();
+ if (Diff == 0)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+
+ unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
+ SDValue NewShift = TLO.DAG.getNode(
+ NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
+ TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+ return TLO.CombineTo(Op, NewShift);
+ }
+ }
+ }
+
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-widen.ll?rev=350399&r1=350398&r2=350399&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-widen.ll Fri Jan 4 07:43:43 2019
@@ -468,11 +468,7 @@ entry:
define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
; SSE2-LABEL: trunc8i32_8i16_lshr:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -767,18 +763,10 @@ entry:
define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; SSE2-LABEL: trunc16i32_16i16_lshr:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: psrld $16, %xmm2
-; SSE2-NEXT: psrld $16, %xmm3
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
-; SSE2-NEXT: pslld $16, %xmm3
; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: pslld $16, %xmm2
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: movdqu %xmm2, (%rax)
@@ -787,18 +775,10 @@ define void @trunc16i32_16i16_lshr(<16 x
;
; SSSE3-LABEL: trunc16i32_16i16_lshr:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: psrld $16, %xmm2
-; SSSE3-NEXT: psrld $16, %xmm3
-; SSSE3-NEXT: psrld $16, %xmm0
-; SSSE3-NEXT: psrld $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm1
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm0
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: packssdw %xmm1, %xmm0
-; SSSE3-NEXT: pslld $16, %xmm3
; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: pslld $16, %xmm2
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSSE3-NEXT: movdqu %xmm2, (%rax)
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=350399&r1=350398&r2=350399&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Fri Jan 4 07:43:43 2019
@@ -478,11 +478,7 @@ entry:
define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
; SSE2-LABEL: trunc8i32_8i16_lshr:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -777,18 +773,10 @@ entry:
define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; SSE2-LABEL: trunc16i32_16i16_lshr:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: psrld $16, %xmm2
-; SSE2-NEXT: psrld $16, %xmm3
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
-; SSE2-NEXT: pslld $16, %xmm3
; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: pslld $16, %xmm2
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: movdqu %xmm2, (%rax)
@@ -797,18 +785,10 @@ define void @trunc16i32_16i16_lshr(<16 x
;
; SSSE3-LABEL: trunc16i32_16i16_lshr:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: psrld $16, %xmm2
-; SSSE3-NEXT: psrld $16, %xmm3
-; SSSE3-NEXT: psrld $16, %xmm0
-; SSSE3-NEXT: psrld $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm1
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm0
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: packssdw %xmm1, %xmm0
-; SSSE3-NEXT: pslld $16, %xmm3
; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: pslld $16, %xmm2
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSSE3-NEXT: movdqu %xmm2, (%rax)
More information about the llvm-commits
mailing list