[llvm] r317299 - [X86] Remove PALIGNR/VALIGN handling from combineBitcastForMaskedOp and move to isel patterns instead. Prefer 128-bit VALIGND/VALIGNQ over PALIGNR during lowering when possible.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 2 23:48:02 PDT 2017
Author: ctopper
Date: Thu Nov 2 23:48:02 2017
New Revision: 317299
URL: http://llvm.org/viewvc/llvm-project?rev=317299&view=rev
Log:
[X86] Remove PALIGNR/VALIGN handling from combineBitcastForMaskedOp and move to isel patterns instead. Prefer 128-bit VALIGND/VALIGNQ over PALIGNR during lowering when possible.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=317299&r1=317298&r2=317299&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 2 23:48:02 2017
@@ -10716,10 +10716,16 @@ static SDValue lowerV2I64VectorShuffle(c
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget.hasSSSE3())
+ if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ }
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
@@ -11016,10 +11022,16 @@ static SDValue lowerV4I32VectorShuffle(c
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget.hasSSSE3())
+ if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ }
// Assume that a single SHUFPS is faster than an alternative sequence of
// multiple instructions (even if the CPU has a domain penalty).
@@ -30674,26 +30686,6 @@ static bool combineBitcastForMaskedOp(SD
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case X86ISD::PALIGNR:
- // PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
- if (!VT.is128BitVector())
- return false;
- Opcode = X86ISD::VALIGN;
- LLVM_FALLTHROUGH;
- case X86ISD::VALIGN: {
- if (EltVT != MVT::i32 && EltVT != MVT::i64)
- return false;
- uint64_t Imm = Op.getConstantOperandVal(2);
- MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
- unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
- unsigned EltSize = EltVT.getSizeInBits();
- // Make sure we can represent the same shift with the new VT.
- if ((ShiftAmt % EltSize) != 0)
- return false;
- Imm = ShiftAmt / EltSize;
- return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
- DAG.getConstant(Imm, DL, MVT::i8));
- }
case X86ISD::SHUF128: {
if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
return false;
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=317299&r1=317298&r2=317299&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Nov 2 23:48:02 2017
@@ -8911,6 +8911,123 @@ defm VPALIGNR: avx512_common_3Op_rm_im
avx512vl_i8_info, avx512vl_i8_info>,
EVEX_CD8<8, CD8VF>;
+// Fragments to help convert valignq into masked valignd. Or valignq/valignd
+// into vpalignr.
+def ValignqImm32XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
+}]>;
+def ValignqImm8XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
+}]>;
+def ValigndImm8XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
+}]>;
+
+multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ SDNodeXForm ImmXForm> {
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, To.RC:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
+ To.RC:$src1, To.RC:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.LdFrag addr:$src2)),
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.LdFrag addr:$src2)),
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+}
+
+multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo From,
+ X86VectorVTInfo To,
+ SDNodeXForm ImmXForm> :
+ avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
+ def : Pat<(From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3)),
+ (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+}
+
+let Predicates = [HasAVX512] in {
+ // For 512-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
+ v16i32_info, ValignqImm32XForm>;
+}
+
+let Predicates = [HasVLX] in {
+ // For 128-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
+ v4i32x_info, ValignqImm32XForm>;
+ // For 256-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
+ v8i32x_info, ValignqImm32XForm>;
+}
+
+let Predicates = [HasVLX, HasBWI] in {
+ // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
+ defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
+ v16i8x_info, ValignqImm8XForm>;
+ defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
+ v16i8x_info, ValigndImm8XForm>;
+}
+
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=317299&r1=317298&r2=317299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Thu Nov 2 23:48:02 2017
@@ -4712,8 +4712,8 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: valignd $6, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x03,0xd9,0x06]
-; CHECK-NEXT: ## ymm3 = ymm1[6,7],ymm0[0,1,2,3,4,5]
+; CHECK-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
+; CHECK-NEXT: ## ymm3 = ymm1[3],ymm0[0,1,2]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
; CHECK-NEXT: ## ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
More information about the llvm-commits
mailing list