[llvm] r280696 - [AVX-512] Fix masked VPERMI2PS isel when the index comes from a bitcast.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 5 23:56:59 PDT 2016
Author: ctopper
Date: Tue Sep 6 01:56:59 2016
New Revision: 280696
URL: http://llvm.org/viewvc/llvm-project?rev=280696&view=rev
Log:
[AVX-512] Fix masked VPERMI2PS isel when the index comes from a bitcast.
We need to bitcast the index operand to a floating point type so that it matches the result type. If not then the passthru part of the DAG will be a bitcast from the index's original type to the destination type. This makes it very difficult to match. The other option would be to add 5 sets of patterns for every other possible type.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=280696&r1=280695&r2=280696&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Sep 6 01:56:59 2016
@@ -17869,19 +17869,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
}
case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:{
+ MVT VT = Op.getSimpleValueType();
// Src2 is the PassThru
SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
+ // PassThru needs to be the same type as the destination in order
+ // to pattern match correctly.
+ SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
- PassThru = DAG.getBitcast(VT, Src2);
+ PassThru = Src2;
// Swap Src1 and Src2 in the node creation
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=280696&r1=280695&r2=280696&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Sep 6 01:56:59 2016
@@ -299,22 +299,6 @@ multiclass AVX512_maskable_3src<bits<8>
(vselect _.KRCWM:$mask, RHS, _.RC:$src1),
vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
-// Similar to AVX512_maskable_3src but in this case the input VT for the tied
-// operand differs from the output VT. This requires a bitconvert on
-// the preserved vector going into the vselect.
-multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
- X86VectorVTInfo InVT,
- dag Outs, dag NonTiedIns, string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm,
- dag RHS> :
- AVX512_maskable_common<O, F, OutVT, Outs,
- !con((ins InVT.RC:$src1), NonTiedIns),
- !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
- !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (vselect InVT.KRCWM:$mask, RHS,
- (bitconvert InVT.RC:$src1))>;
-
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@@ -1181,83 +1165,76 @@ defm VPBROADCASTMB2Q : avx512_mask_broad
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
-multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let Constraints = "$src1 = $dst" in {
- defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
+ // The index operand in the pattern should really be an integer type. However,
+ // if we do that and it happens to come from a bitcast, then it becomes
+ // difficult to find the bitcast needed to convert the index to the
+ // destination type for the passthru since it will be folded with the bitcast
+ // of the index operand.
+ defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
AVX5128IBase;
- defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
+ (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
(_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
EVEX_4V, AVX5128IBase;
}
}
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst" in
- defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
- (_.VT (X86VPermi2X IdxVT.RC:$src1,
+ (_.VT (X86VPermi2X _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo VTInfo,
- AVX512VLVectorVTInfo ShuffleMask> {
- defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
- ShuffleMask.info512>,
- avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
- ShuffleMask.info512>, EVEX_V512;
+ AVX512VLVectorVTInfo VTInfo> {
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
- ShuffleMask.info128>,
- avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
- ShuffleMask.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
- ShuffleMask.info256>,
- avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
- ShuffleMask.info256>, EVEX_V256;
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
- AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
- defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
- Idx.info512>, EVEX_V512;
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
- defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
- Idx.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
- Idx.info256>, EVEX_V256;
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
- avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
- avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
- avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
+ avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
- avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
+ avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
- avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
- avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
// VPERMT2
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=280696&r1=280695&r2=280696&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Sep 6 01:56:59 2016
@@ -414,10 +414,12 @@ def X86VPermt2 : SDNode<"X86ISD::VPE
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
+// Even though the index operand should be integer, we need to make it match the
+// destination type so that we can pattern match the masked version where the
+// index is also the passthru operand.
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
- SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameSizeAs<0,1>,
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=280696&r1=280695&r2=280696&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Tue Sep 6 01:56:59 2016
@@ -2030,9 +2030,8 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9]
-; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda]
-; CHECK-NEXT: vblendmps %xmm3, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc3]
+; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
+; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%x1cast = bitcast <2 x i64> %x1 to <4 x i32>
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
More information about the llvm-commits
mailing list