[llvm] r319312 - [X86][AVX512] Setup unary (PABS/VPLZCNT/VPOPCNT/VPCONFLICT/VMOV*DUP) instruction scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 05:49:51 PST 2017


Author: rksimon
Date: Wed Nov 29 05:49:51 2017
New Revision: 319312

URL: http://llvm.org/viewvc/llvm-project?rev=319312&view=rev
Log:
[X86][AVX512] Setup unary (PABS/VPLZCNT/VPOPCNT/VPCONFLICT/VMOV*DUP) instruction scheduler classes


Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319312&r1=319311&r2=319312&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Nov 29 05:49:51 2017
@@ -9098,84 +9098,93 @@ defm VDBPSADBW: avx512_common_3Op_rm_imm
                     avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
 
 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           X86VectorVTInfo _> {
+                           OpndItins itins, X86VectorVTInfo _> {
   let ExeDomain = _.ExeDomain in {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                     (ins _.RC:$src1), OpcodeStr,
                     "$src1", "$src1",
-                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
+                    (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
+                    Sched<[itins.Sched]>;
 
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.MemOp:$src1), OpcodeStr,
                   "$src1", "$src1",
-                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
-            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
+            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
+            Sched<[itins.Sched.Folded]>;
   }
 }
 
 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            X86VectorVTInfo _> :
-           avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
+                            OpndItins itins, X86VectorVTInfo _> :
+           avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.ScalarMemOp:$src1), OpcodeStr,
                   "${src1}"##_.BroadcastStr,
                   "${src1}"##_.BroadcastStr,
                   (_.VT (OpNode (X86VBroadcast
-                                    (_.ScalarLdFrag addr:$src1))))>,
-             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+                                    (_.ScalarLdFrag addr:$src1)))), itins.rm>,
+             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+             Sched<[itins.Sched.Folded]>;
 }
 
 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+                              OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+                              Predicate prd> {
   let Predicates = [prd] in
-    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
+                             EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
                               EVEX_V256;
-    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
+    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
                               EVEX_V128;
   }
 }
 
 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+                               OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+                               Predicate prd> {
   let Predicates = [prd] in
-    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
                               EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
                                  EVEX_V256;
-    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
                                  EVEX_V128;
   }
 }
 
 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
-                                 SDNode OpNode, Predicate prd> {
-  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
-                               prd>, VEX_W;
-  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
-                               prd>;
+                                 SDNode OpNode, OpndItins itins, Predicate prd> {
+  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
+                               avx512vl_i64_info, prd>, VEX_W;
+  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
+                               avx512vl_i32_info, prd>;
 }
 
 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
-                                 SDNode OpNode, Predicate prd> {
-  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>, VEX_WIG;
-  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>, VEX_WIG;
+                                 SDNode OpNode, OpndItins itins, Predicate prd> {
+  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
+                              avx512vl_i16_info, prd>, VEX_WIG;
+  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
+                              avx512vl_i8_info, prd>, VEX_WIG;
 }
 
 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
                                   bits<8> opc_d, bits<8> opc_q,
-                                  string OpcodeStr, SDNode OpNode> {
-  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+                                  string OpcodeStr, SDNode OpNode,
+                                  OpndItins itins> {
+  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
                                     HasAVX512>,
-              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
                                     HasBWI>;
 }
 
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
 
 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
 let Predicates = [HasAVX512, NoVLX] in {
@@ -9191,13 +9200,17 @@ let Predicates = [HasAVX512, NoVLX] in {
              sub_xmm)>;
 }
 
-multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
-
-  defm NAME :          avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
+multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, OpndItins itins,
+                       Predicate prd> {
+  defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, itins, prd>;
 }
 
-defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
-defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+// FIXME: Is there a better scheduler itinerary for VPLZCNT?
+defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", SSE_INTALU_ITINS_P, HasCDI>;
+
+// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
+defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
+                                        SSE_INTALU_ITINS_P, HasCDI>;
 
 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
 let Predicates = [HasCDI, NoVLX] in {
@@ -9228,9 +9241,10 @@ let Predicates = [HasCDI, NoVLX] in {
 // Counts number of ones - VPOPCNTD and VPOPCNTQ
 //===---------------------------------------------------------------------===//
 
-multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
+multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr,
+                                   OpndItins itins, X86VectorVTInfo VTInfo> {
   let Predicates = [HasVPOPCNTDQ] in
-    defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
+    defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, itins, VTInfo>, EVEX_V512;
 }
 
 // Use 512bit version to implement 128/256 bit.
@@ -9254,59 +9268,67 @@ multiclass avx512_unary_lowering<SDNode
   }
 }
 
-defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
+// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
+defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", SSE_INTALU_ITINS_P,
+                                        v16i32_info>,
                 avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
-defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
+
+defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", SSE_INTALU_ITINS_P,
+                                        v8i64_info>,
                 avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
 
 //===---------------------------------------------------------------------===//
 // Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
-multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
-  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
-                                      HasAVX512>, XS;
+multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            OpndItins itins> {
+  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
+                                      avx512vl_f32_info, HasAVX512>, XS;
 }
 
-defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
-defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
+defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
+defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - MOVDDUP
 //===----------------------------------------------------------------------===//
 
 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                              X86VectorVTInfo _> {
+                              OpndItins itins, X86VectorVTInfo _> {
   let ExeDomain = _.ExeDomain in {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
-                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
+                   (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
+                   Sched<[itins.Sched]>;
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
                  (_.VT (OpNode (_.VT (scalar_to_vector
-                                       (_.ScalarLdFrag addr:$src)))))>,
-                 EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+                                       (_.ScalarLdFrag addr:$src))))),
+                 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
+                 Sched<[itins.Sched.Folded]>;
   }
 }
 
 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                  AVX512VLVectorVTInfo VTInfo> {
+                                 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
 
-  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, VTInfo.info512>, EVEX_V512;
+  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
 
   let Predicates = [HasAVX512, HasVLX] in {
-    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, VTInfo.info256>,
+    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
                                EVEX_V256;
-    defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, VTInfo.info128>,
+    defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
                                   EVEX_V128;
   }
 }
 
-multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
-  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode,
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          OpndItins itins> {
+  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
                                         avx512vl_f64_info>, XD, VEX_W;
 }
 
-defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
 
 let Predicates = [HasVLX] in {
 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
@@ -10204,10 +10226,11 @@ defm VPDPWSSDS  : VNNI_common<0x53, "vpd
 // Bit Algorithms
 //===----------------------------------------------------------------------===//
 
-defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop,
+// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
+defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
                                    avx512vl_i8_info, HasBITALG>,
                 avx512_unary_lowering<ctpop, avx512vl_i8_info, HasBITALG>;
-defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop,
+defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
                                    avx512vl_i16_info, HasBITALG>,
                 avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
 

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319312&r1=319311&r2=319312&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Nov 29 05:49:51 2017
@@ -4695,6 +4695,12 @@ let Predicates = [UseSSE3] in {
 // SSE3 - Replicate Double FP - MOVDDUP
 //===---------------------------------------------------------------------===//
 
+// FIXME: Improve MOVDDUP/BROADCAST reg/mem scheduling itineraries. 
+let Sched = WriteFShuffle in
+def SSE_MOVDDUP : OpndItins<
+  IIC_SSE_MOV_LH, IIC_SSE_MOV_LH
+>;
+
 multiclass sse3_replicate_dfp<string OpcodeStr> {
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),




More information about the llvm-commits mailing list