[llvm] r355071 - [X86] Use PreprocessISelDAG to convert vector sra/srl/shl to the X86 specific variable shift ISD opcodes.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 27 23:21:26 PST 2019
Author: ctopper
Date: Wed Feb 27 23:21:26 2019
New Revision: 355071
URL: http://llvm.org/viewvc/llvm-project?rev=355071&view=rev
Log:
[X86] Use PreprocessISelDAG to convert vector sra/srl/shl to the X86 specific variable shift ISD opcodes.
These allows use to use the same set of isel patterns for sra/srl/shl which are undefined for out of range shifts and intrinsic shifts which aren't undefined.
Doing this late allows DAG combine to have every opportunity to optimize the sra/srl/shl nodes.
This removes about 7000 bytes from the isel table and simplies the td files.
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=355071&r1=355070&r2=355071&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Feb 27 23:21:26 2019
@@ -753,6 +753,30 @@ void X86DAGToDAGISel::PreprocessISelDAG(
continue;
}
+ // Replace vector shifts with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ switch (N->getOpcode()) {
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ if (N->getValueType(0).isVector()) {
+ unsigned NewOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
+ case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
+ case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
+ }
+ SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ }
+
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=355071&r1=355070&r2=355071&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Feb 27 23:21:26 2019
@@ -6427,118 +6427,22 @@ multiclass avx512_var_shift_w<bits<8> op
}
}
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
-defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
-
-// Special handing for handling VPSRAV intrinsics.
-multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode,
- X86VectorVTInfo _, list<Predicate> p> {
- let Predicates = p in {
- def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
- _.RC:$src2)>;
- def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
- _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
- _.RC:$src1, addr:$src2)>;
- }
-}
-
-multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode,
- X86VectorVTInfo _,
- list<Predicate> p> :
- avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> {
- let Predicates = p in {
- def : Pat<(_.VT (OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
- _.RC:$src1, addr:$src2)>;
- }
-}
-
-multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo,
- Predicate p> {
- defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>;
- defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256,
- [HasVLX, p]>;
- defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128,
- [HasVLX, p]>;
-}
-
-multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo,
- Predicate p> {
- defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>;
- defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256,
- [HasVLX, p]>;
- defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128,
- [HasVLX, p]>;
-}
-
-defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info,
- HasBWI>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav,
- avx512vl_i32_info, HasAVX512>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav,
- avx512vl_i64_info, HasAVX512>;
-
-defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info,
- HasBWI>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv,
- avx512vl_i32_info, HasAVX512>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv,
- avx512vl_i64_info, HasAVX512>;
-
-defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info,
- HasBWI>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv,
- avx512vl_i32_info, HasAVX512>;
-defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv,
- avx512vl_i64_info, HasAVX512>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=355071&r1=355070&r2=355071&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Feb 27 23:21:26 2019
@@ -8371,7 +8371,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i
// Variable Bit Shifts
//
multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode IntrinNode, ValueType vt128, ValueType vt256> {
+ ValueType vt128, ValueType vt256> {
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8400,23 +8400,14 @@ multiclass avx2_var_shift<bits<8> opc, s
(vt256 (load addr:$src2)))))]>,
VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
SchedWriteVarVecShift.YMM.ReadAfterFold]>;
-
- def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)),
- (!cast<Instruction>(NAME#"rr") VR128:$src1, VR128:$src2)>;
- def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))),
- (!cast<Instruction>(NAME#"rm") VR128:$src1, addr:$src2)>;
- def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)),
- (!cast<Instruction>(NAME#"Yrr") VR256:$src1, VR256:$src2)>;
- def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))),
- (!cast<Instruction>(NAME#"Yrm") VR256:$src1, addr:$src2)>;
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>;
- defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W;
- defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>;
- defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W;
- defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>;
+ defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
+ defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
+ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
}
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list