[llvm] r280465 - [AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 1 22:29:13 PDT 2016


Author: ctopper
Date: Fri Sep  2 00:29:13 2016
New Revision: 280465

URL: http://llvm.org/viewvc/llvm-project?rev=280465&view=rev
Log:
[AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.

These are needed in order to remove the masked floating point logical operation intrinsics and use native IR.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-arith.ll
    llvm/trunk/test/CodeGen/X86/vec_fabs.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Sep  2 00:29:13 2016
@@ -4238,215 +4238,72 @@ defm VXOR  : avx512_fp_binop_p<0x57, "vx
                                SSE_ALU_ITINS_P, 1>;
 
 // Patterns catch floating point selects with bitcasted integer logic ops.
-let Predicates = [HasVLX] in {
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
-            VR128X:$src2)>;
-
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v4f32 (vselect VK4WM:$mask,
-                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
-                    VR128X:$src0)),
-            (VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-  def : Pat<(v2f64 (vselect VK2WM:$mask,
-                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
-                    (bitconvert (v4i32 immAllZerosV)))),
-            (VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v8f32 (vselect VK8WM:$mask,
-                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
-                    VR256X:$src0)),
-            (VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-  def : Pat<(v4f64 (vselect VK4WM:$mask,
-                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
-                    (bitconvert (v8i32 immAllZerosV)))),
-            (VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
+                                      X86VectorVTInfo _, Predicate prd> {
+let Predicates = [prd] in {
+  // Masked register-register logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, _.RC:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+             _.RC:$src2)>;
+  // Masked register-memory logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                                         (load addr:$src2)))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+             addr:$src2)>;
+  // Register-broadcast logical operations.
+  def : Pat<(_.i64VT (OpNode _.RC:$src1,
+                      (bitconvert (_.VT (X86VBroadcast
+                                         (_.ScalarLdFrag addr:$src2)))))),
+            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (_.i64VT (OpNode _.RC:$src1,
+                              (bitconvert (_.VT
+                                           (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2))))))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (_.i64VT (OpNode _.RC:$src1,
+                              (bitconvert (_.VT
+                                           (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2))))))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+}
 }
 
-let Predicates = [HasAVX512] in {
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
-                     VR512:$src0)),
-            (VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
-                     VR512:$src0)),
-            (VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
-                     VR512:$src0)),
-            (VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
-                     VR512:$src0)),
-            (VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
-                     (bitconvert (v16i32 immAllZerosV)))),
-            (VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
-                     (bitconvert (v16i32 immAllZerosV)))),
-            (VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
-                     (bitconvert (v16i32 immAllZerosV)))),
-            (VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v16f32 (vselect VK16WM:$mask,
-                     (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
-                     (bitconvert (v16i32 immAllZerosV)))),
-            (VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
-                    VR512:$src0)),
-            (VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
-                    VR512:$src0)),
-            (VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
-                    VR512:$src0)),
-            (VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
-                    VR512:$src0)),
-            (VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
-                    (bitconvert (v16i32 immAllZerosV)))),
-            (VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
-                    (bitconvert (v16i32 immAllZerosV)))),
-            (VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
-                    (bitconvert (v16i32 immAllZerosV)))),
-            (VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-  def : Pat<(v8f64 (vselect VK8WM:$mask,
-                    (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
-                    (bitconvert (v16i32 immAllZerosV)))),
-            (VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
+  defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
 }
 
+defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
+defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
+defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
+defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
+
 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _> {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Fri Sep  2 00:29:13 2016
@@ -985,20 +985,17 @@ define <8 x double> @test_maskz_broadcas
 define <16 x float>  @test_fxor(<16 x float> %a) {
 ; AVX512F-LABEL: test_fxor:
 ; AVX512F:       ## BB#0:
-; AVX512F-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512F-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: test_fxor:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_fxor:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: test_fxor:
@@ -1051,20 +1048,17 @@ define <8 x float>  @test_fxor_8f32(<8 x
 define <8 x double> @fabs_v8f64(<8 x double> %p)
 ; AVX512F-LABEL: fabs_v8f64:
 ; AVX512F:       ## BB#0:
-; AVX512F-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: fabs_v8f64:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: fabs_v8f64:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: fabs_v8f64:
@@ -1085,20 +1079,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8
 define <16 x float> @fabs_v16f32(<16 x float> %p)
 ; AVX512F-LABEL: fabs_v16f32:
 ; AVX512F:       ## BB#0:
-; AVX512F-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: fabs_v16f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: fabs_v16f32:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: fabs_v16f32:

Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Fri Sep  2 00:29:13 2016
@@ -145,8 +145,7 @@ define <8 x double> @fabs_v8f64(<8 x dou
 ;
 ; X32_AVX512VL-LABEL: fabs_v8f64:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastsd {{\.LCPI.*}}, %zmm1
-; X32_AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v8f64:
@@ -163,8 +162,7 @@ define <8 x double> @fabs_v8f64(<8 x dou
 ;
 ; X64_AVX512VL-LABEL: fabs_v8f64:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm1
-; X64_AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v8f64:
@@ -186,8 +184,7 @@ define <16 x float> @fabs_v16f32(<16 x f
 ;
 ; X32_AVX512VL-LABEL: fabs_v16f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %zmm1
-; X32_AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v16f32:
@@ -204,8 +201,7 @@ define <16 x float> @fabs_v16f32(<16 x f
 ;
 ; X64_AVX512VL-LABEL: fabs_v16f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %zmm1
-; X64_AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v16f32:




More information about the llvm-commits mailing list