[llvm] 63b1e58 - [DAG] SimplifyDemandedBits - simplify rotl/rotr to shl/srl (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 25 03:14:21 PST 2021
Author: Simon Pilgrim
Date: 2021-11-25T11:14:15Z
New Revision: 63b1e58f0738cc9977b47f947679ef5544808b73
URL: https://github.com/llvm/llvm-project/commit/63b1e58f0738cc9977b47f947679ef5544808b73
DIFF: https://github.com/llvm/llvm-project/commit/63b1e58f0738cc9977b47f947679ef5544808b73.diff
LOG: [DAG] SimplifyDemandedBits - simplify rotl/rotr to shl/srl (REAPPLIED)
If we only demand bits from one half of a rotation pattern, see if we can simplify to a logical shift.
For the ARM/AArch64 rev16/32 patterns, I had to drop a fold to prevent srl(bswap()) -> rotr(bswap) -> srl(bswap) infinite loops. I've replaced this with an isel PatFrag which should do the same task.
Reapplied with fix for AArch64 rev patterns to matching the ARM fix.
https://alive2.llvm.org/ce/z/iroxki (rol -> shl by amt iff demanded bits has at least as many trailing zeros as the shift amount)
https://alive2.llvm.org/ce/z/4ez_U- (ror -> shl by revamt iff demanded bits has at least as many trailing zeros as the reverse shift amount)
https://alive2.llvm.org/ce/z/cD7dR- (ror -> lshr by amt iff demanded bits has at least as many leading zeros as the shift amount)
https://alive2.llvm.org/ce/z/_XGHtQ (rol -> lshr by revamt iff demanded bits has at least as many leading zeros as the reverse shift amount)
Differential Revision: https://reviews.llvm.org/D114354
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
llvm/test/CodeGen/X86/rotate_vec.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index aabec1ab1df95..234370bf0e057 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ bool IsROTL = (Op.getOpcode() == ISD::ROTL);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+ unsigned RevAmt = BitWidth - Amt;
+
+ // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
+ // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
+ APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // rot*(x, 0) --> x
+ if (Amt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // See if we don't demand either half of the rotated bits.
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
+ DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
+ }
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
+ DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ }
+ }
+
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index eb143b3e1c2fd..d5e2e8b08457d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -890,7 +890,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -13876,34 +13875,6 @@ static SDValue performANDCombine(SDNode *N,
return SDValue();
}
-static SDValue performSRLCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
- // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
- // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::BSWAP) {
- SDLoc DL(N);
- SDValue N1 = N->getOperand(1);
- SDValue N00 = N0.getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- uint64_t ShiftAmt = C->getZExtValue();
- if (VT == MVT::i32 && ShiftAmt == 16 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- if (VT == MVT::i64 && ShiftAmt == 32 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- }
- }
- return SDValue();
-}
-
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
@@ -17271,8 +17242,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
- case ISD::SRL:
- return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index db8e0c5dac4a4..decee117d2d5f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -437,6 +437,18 @@ def non_temporal_store :
cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR32:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i32 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
+def top32Zero: PatLeaf<(i64 GPR64:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i64 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
+ }]>;
+
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
@@ -2046,6 +2058,10 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
+// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
+def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
+def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
+
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 23c1a6e8cf212..5f5e76b9558a5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -17084,18 +17084,6 @@ static SDValue PerformShiftCombine(SDNode *N,
const ARMSubtarget *ST) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
- // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
- // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
- SDValue N1 = N->getOperand(1);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- SDValue N0 = N->getOperand(0);
- if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
- DAG.MaskedValueIsZero(N0.getOperand(0),
- APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
- }
- }
if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
N->getOperand(0)->getOpcode() == ISD::AND &&
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7d0bc756e8825..1c1db473f866a 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -420,6 +420,12 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR:$src), [{
+ return !SDValue(N,0)->getValueType(0).isVector() &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -4748,6 +4754,8 @@ def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
(REV16 (LDRH addrmode3:$addr))>;
def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
(STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+def : ARMV6Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (REV16 GPR:$Rn)>;
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index bf717a4056e90..f09ad81676001 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1576,6 +1576,8 @@ def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
(tREV16 (tLDRHr t_addrmode_rr:$addr))>;
+def : T1Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (tREV16 tGPR:$Rn)>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 783db9dde17fe..e0618a2966ea7 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2926,18 +2926,11 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
-// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
-def top16Zero: PatLeaf<(i32 rGPR:$src), [{
- return !SDValue(N,0)->getValueType(0).isVector() &&
- CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
- }]>;
-
// so_imm_notSext is needed instead of so_imm_not, as the value of imm
// will match the extended, not the original bitWidth for $src.
def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
(t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
-
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -3283,6 +3276,9 @@ def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
[(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
Sched<[WriteALU]>;
+def : T2Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (t2REV16 rGPR:$Rn)>;
+
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
[(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll
index 7e4e8515f0b3b..c8a03098ebccb 100644
--- a/llvm/test/CodeGen/X86/rotate_vec.ll
+++ b/llvm/test/CodeGen/X86/rotate_vec.ll
@@ -111,21 +111,21 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) {
; XOPAVX1-LABEL: rot_v4i32_mask_ashr0:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: rot_v4i32_mask_ashr0:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: rot_v4i32_mask_ashr0:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vprold $1, %xmm0, %xmm0
+; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 28>
@@ -138,24 +138,24 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) {
define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
; XOPAVX1-LABEL: rot_v4i32_mask_ashr1:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0
-; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: rot_v4i32_mask_ashr1:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0
-; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: rot_v4i32_mask_ashr1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrad $25, %xmm0, %xmm0
-; AVX512-NEXT: vprold $1, %xmm0, %xmm0
+; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index f68ec3e45c807..27ef11dd815ff 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -2075,51 +2075,15 @@ define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vprolq $15, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VLBW-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vprolq $15, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VLBW-NEXT: retq
-;
-; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vzeroupper
-; AVX512VBMI2-NEXT: retq
-;
-; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v2i64:
-; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vprolq $15, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: retq
+; AVX512-LABEL: splatconstant_rotate_mask_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $49, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_mask_v2i64:
; XOP: # %bb.0:
-; XOP-NEXT: vprotq $15, %xmm0, %xmm0
+; XOP-NEXT: vpsrlq $49, %xmm0, %xmm0
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index 2e69ffc5a9b09..bc82f32007ddd 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
@@ -1799,60 +1799,24 @@ define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vprolq $15, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VLBW-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vprolq $15, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512VLBW-NEXT: retq
-;
-; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vprolq $15, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: retq
-;
-; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v4i64:
-; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vprolq $15, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: retq
+; AVX512-LABEL: splatconstant_rotate_mask_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $49, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm1
+; XOPAVX1-NEXT: vpsrlq $49, %xmm0, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm1
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: vpsrlq $49, %ymm0, %ymm0
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%shl = shl <4 x i64> %a, <i64 15, i64 15, i64 15, i64 15>
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 02d81bb9f0ade..38da7d88cd8ae 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -1043,7 +1043,7 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind {
; AVX512-LABEL: splatconstant_rotate_mask_v8i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vprolq $15, %zmm0, %zmm0
+; AVX512-NEXT: vpsrlq $49, %zmm0, %zmm0
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: retq
%shl = shl <8 x i64> %a, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
More information about the llvm-commits
mailing list