[llvm] r333473 - [X86] Remove some of the extractelts from the new MOVSS+FMA patterns.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue May 29 15:52:10 PDT 2018


Author: ctopper
Date: Tue May 29 15:52:09 2018
New Revision: 333473

URL: http://llvm.org/viewvc/llvm-project?rev=333473&view=rev
Log:
[X86] Remove some of the extractelts from the new MOVSS+FMA patterns.

We only need the extractelt that corresponds to the register we're trying to insert back into. We can't guarantee the others haven't been optimized out depending on how those operands were produced.

So instead just look for an FR32/FR64 input and emit a COPY_TO_REGCLASS to VR128 in the output pattern. This matches what we do for ADD/SUB/MUL/DIV.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFMA.td

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=333473&r1=333472&r2=333473&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue May 29 15:52:09 2018
@@ -6697,62 +6697,69 @@ defm VFNMADD : avx512_fma3s<0xAD, 0xBD,
 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
                             X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
 
-multiclass avx512_scalar_fma_patterns<SDNode Op, string Prefix, string Suffix, SDNode Move,
-                                      ValueType VT, ValueType EltVT, PatLeaf ZeroFP> {
+multiclass avx512_scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
+                                      SDNode Move, X86VectorVTInfo _,
+                                      PatLeaf ZeroFP> {
   let Predicates = [HasAVX512] in {
-    def : Pat<(VT (Move (VT VR128X:$src1), (VT (scalar_to_vector
-                (Op (EltVT (extractelt (VT VR128X:$src2), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src1), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src3), (iPTR 0)))))))),
+    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
+                (Op _.FRC:$src2,
+                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+                    _.FRC:$src3))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
-               VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
+               VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+               (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
 
-    def : Pat<(VT (Move (VT VR128X:$src1), (VT (scalar_to_vector
+    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
-                (Op (EltVT (extractelt (VT VR128X:$src2), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src1), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src3), (iPTR 0)))),
-                (EltVT (extractelt (VT VR128X:$src1), (iPTR 0)))))))),
+                (Op _.FRC:$src2,
+                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+                    _.FRC:$src3),
+                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
-               VR128X:$src1, VK1WM:$mask, VR128X:$src2, VR128X:$src3)>;
+               VR128X:$src1, VK1WM:$mask,
+               (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+               (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
 
-    def : Pat<(VT (Move (VT VR128X:$src1), (VT (scalar_to_vector
+    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
-                (Op (EltVT (extractelt (VT VR128X:$src2), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src3), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src1), (iPTR 0)))),
-                (EltVT (extractelt (VT VR128X:$src1), (iPTR 0)))))))),
+                (Op _.FRC:$src2, _.FRC:$src3,
+                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
-               VR128X:$src1, VK1WM:$mask, VR128X:$src2, VR128X:$src3)>;
+               VR128X:$src1, VK1WM:$mask,
+               (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+               (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
 
-    def : Pat<(VT (Move (VT VR128X:$src1), (VT (scalar_to_vector
+    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
-                (Op (EltVT (extractelt (VT VR128X:$src2), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src1), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128X:$src3), (iPTR 0)))),
-                (EltVT ZeroFP)))))),
+                (Op _.FRC:$src2,
+                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+                    _.FRC:$src3),
+                (_.EltVT ZeroFP)))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
-               VR128X:$src1, VK1WM:$mask, VR128X:$src2, VR128X:$src3)>;
+               VR128X:$src1, VK1WM:$mask,
+               (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+               (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
   }
 }
 
 defm : avx512_scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss,
-                                  v4f32, f32, fp32imm0>;
+                                  v4f32x_info, fp32imm0>;
 defm : avx512_scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss,
-                                  v4f32, f32, fp32imm0>;
+                                  v4f32x_info, fp32imm0>;
 defm : avx512_scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss,
-                                  v4f32, f32, fp32imm0>;
+                                  v4f32x_info, fp32imm0>;
 defm : avx512_scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss,
-                                  v4f32, f32, fp32imm0>;
+                                  v4f32x_info, fp32imm0>;
 
 defm : avx512_scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd,
-                                  v2f64, f64, fp64imm0>;
+                                  v2f64x_info, fp64imm0>;
 defm : avx512_scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd,
-                                  v2f64, f64, fp64imm0>;
+                                  v2f64x_info, fp64imm0>;
 defm : avx512_scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd,
-                                  v2f64, f64, fp64imm0>;
+                                  v2f64x_info, fp64imm0>;
 defm : avx512_scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd,
-                                  v2f64, f64, fp64imm0>;
+                                  v2f64x_info, fp64imm0>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA

Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=333473&r1=333472&r2=333473&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Tue May 29 15:52:09 2018
@@ -364,27 +364,29 @@ defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "
 defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub,
                      SchedWriteFMA.Scl>, VEX_LIG;
 
-multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix, SDNode Move,
-                               ValueType VT, ValueType EltVT> {
+multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
+                               SDNode Move, ValueType VT, ValueType EltVT,
+                               RegisterClass RC> {
   let Predicates = [HasFMA, NoAVX512] in {
     def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
-                (Op (EltVT (extractelt (VT VR128:$src2), (iPTR 0))),
+                (Op RC:$src2,
                     (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
-                    (EltVT (extractelt (VT VR128:$src3), (iPTR 0)))))))),
+                    RC:$src3))))),
               (!cast<I>(Prefix#"213"#Suffix#"r_Int")
-               VR128:$src1, VR128:$src2, VR128:$src3)>;
+               VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128),
+               (COPY_TO_REGCLASS RC:$src3, VR128))>;
   }
 }
 
-defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32>;
-defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32>;
-defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32>;
-defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32>;
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32>;
 
-defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64>;
-defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64>;
-defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64>;
-defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64>;
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64>;
 
 //===----------------------------------------------------------------------===//
 // FMA4 - AMD 4 operand Fused Multiply-Add instructions




More information about the llvm-commits mailing list