[llvm] r280195 - [AVX-512] Add patterns to select masked logical operations if the select has a floating point type.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 30 22:37:53 PDT 2016


Author: ctopper
Date: Wed Aug 31 00:37:52 2016
New Revision: 280195

URL: http://llvm.org/viewvc/llvm-project?rev=280195&view=rev
Log:
[AVX-512] Add patterns to select masked logical operations if the select has a floating point type.

This is needed in order to replace the masked floating point logical op intrinsics with native IR.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-logic.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=280195&r1=280194&r2=280195&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Aug 31 00:37:52 2016
@@ -4235,6 +4235,216 @@ defm VOR   : avx512_fp_binop_p<0x56, "vo
 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
                                SSE_ALU_ITINS_P, 1>;
 
+// Patterns catch floating point selects with bitcasted integer logic ops.
+let Predicates = [HasVLX] in {
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
+            VR128X:$src2)>;
+
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v4f32 (vselect VK4WM:$mask,
+                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
+                    VR128X:$src0)),
+            (VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                    (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
+                    (bitconvert (v4i32 immAllZerosV)))),
+            (VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v8f32 (vselect VK8WM:$mask,
+                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
+                    VR256X:$src0)),
+            (VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v4f64 (vselect VK4WM:$mask,
+                    (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
+                    (bitconvert (v8i32 immAllZerosV)))),
+            (VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+}
+
+let Predicates = [HasAVX512] in {
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
+                     VR512:$src0)),
+            (VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
+                     VR512:$src0)),
+            (VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
+                     VR512:$src0)),
+            (VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
+                     VR512:$src0)),
+            (VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
+                     (bitconvert (v16i32 immAllZerosV)))),
+            (VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
+                     (bitconvert (v16i32 immAllZerosV)))),
+            (VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
+                     (bitconvert (v16i32 immAllZerosV)))),
+            (VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v16f32 (vselect VK16WM:$mask,
+                     (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
+                     (bitconvert (v16i32 immAllZerosV)))),
+            (VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
+                    VR512:$src0)),
+            (VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
+                    VR512:$src0)),
+            (VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
+                    VR512:$src0)),
+            (VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
+                    VR512:$src0)),
+            (VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
+                    (bitconvert (v16i32 immAllZerosV)))),
+            (VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
+                    (bitconvert (v16i32 immAllZerosV)))),
+            (VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
+                    (bitconvert (v16i32 immAllZerosV)))),
+            (VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+  def : Pat<(v8f64 (vselect VK8WM:$mask,
+                    (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
+                    (bitconvert (v16i32 immAllZerosV)))),
+            (VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+}
+
 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _> {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

Modified: llvm/trunk/test/CodeGen/X86/avx512-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-logic.ll?rev=280195&r1=280194&r2=280195&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll Wed Aug 31 00:37:52 2016
@@ -498,16 +498,14 @@ entry:
 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_mask_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpxorq %zmm2, %zmm1, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %zmm2, %zmm1, %zmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -522,16 +520,14 @@ entry:
 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_maskz_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -546,16 +542,14 @@ entry:
 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_mask_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpxord %zmm2, %zmm1, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm1
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -570,16 +564,14 @@ entry:
 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_maskz_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpxord %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -594,16 +586,14 @@ entry:
 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_mask_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %zmm1, %zmm2, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vporq %zmm1, %zmm2, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %zmm1, %zmm2, %zmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -618,16 +608,14 @@ entry:
 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_maskz_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %zmm0, %zmm1, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vporq %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %zmm0, %zmm1, %zmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -642,16 +630,14 @@ entry:
 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_mask_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %zmm1, %zmm2, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpord %zmm1, %zmm2, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %zmm1, %zmm2, %zmm1
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -666,16 +652,14 @@ entry:
 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_maskz_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %zmm0, %zmm1, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpord %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %zmm0, %zmm1, %zmm0
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -690,16 +674,14 @@ entry:
 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_mask_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %zmm1, %zmm2, %zmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -714,16 +696,14 @@ entry:
 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_maskz_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %zmm0, %zmm1, %zmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -738,16 +718,14 @@ entry:
 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_mask_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpandd %zmm1, %zmm2, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %zmm1, %zmm2, %zmm1
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -762,16 +740,14 @@ entry:
 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_maskz_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpandd %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %zmm0, %zmm1, %zmm0
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -786,16 +762,14 @@ entry:
 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_mask_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %zmm2, %zmm1, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpandnq %zmm2, %zmm1, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %zmm2, %zmm1, %zmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -811,16 +785,14 @@ entry:
 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; KNL-LABEL: test_mm512_maskz_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %zmm1, %zmm0, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpandnq %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x double> %__A to <8 x i64>
@@ -836,16 +808,14 @@ entry:
 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_mask_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %zmm2, %zmm1, %zmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; KNL-NEXT:    vpandnd %zmm2, %zmm1, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_mask_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %zmm2, %zmm1, %zmm1
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; SKX-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>
@@ -861,16 +831,14 @@ entry:
 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; KNL-LABEL: test_mm512_maskz_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %zmm1, %zmm0, %zmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpandnd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <16 x float> %__A to <16 x i32>

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll?rev=280195&r1=280194&r2=280195&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll Wed Aug 31 00:37:52 2016
@@ -222,16 +222,14 @@ entry:
 define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_mask_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %ymm2, %ymm1, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpandnq %ymm2, %ymm1, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %ymm2, %ymm1, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vandnpd %ymm2, %ymm1, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -248,16 +246,14 @@ entry:
 define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_maskz_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %ymm1, %ymm0, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -274,16 +270,14 @@ entry:
 define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_mask_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %xmm2, %xmm1, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpandnq %xmm2, %xmm1, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %xmm2, %xmm1, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vandnpd %xmm2, %xmm1, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -300,16 +294,14 @@ entry:
 define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_maskz_andnot_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_andnot_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -326,16 +318,14 @@ entry:
 define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_mask_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %ymm2, %ymm1, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpandnd %ymm2, %ymm1, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %ymm2, %ymm1, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vandnps %ymm2, %ymm1, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -351,16 +341,14 @@ entry:
 define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_maskz_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %ymm1, %ymm0, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -376,16 +364,14 @@ entry:
 define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_mask_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %xmm2, %xmm1, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpandnd %xmm2, %xmm1, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %xmm2, %xmm1, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vandnps %xmm2, %xmm1, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -402,16 +388,14 @@ entry:
 define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_maskz_andnot_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandnq %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_andnot_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -428,16 +412,14 @@ entry:
 define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_mask_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %ymm1, %ymm2, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpandq %ymm1, %ymm2, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %ymm1, %ymm2, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vandpd %ymm1, %ymm2, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -453,16 +435,14 @@ entry:
 define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_maskz_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpandq %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %ymm0, %ymm1, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vandpd %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -478,16 +458,14 @@ entry:
 define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_mask_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %xmm1, %xmm2, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpandq %xmm1, %xmm2, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %xmm1, %xmm2, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vandpd %xmm1, %xmm2, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -503,16 +481,14 @@ entry:
 define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_maskz_and_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %xmm0, %xmm1, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpandq %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_and_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandpd %xmm0, %xmm1, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vandpd %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -528,16 +504,14 @@ entry:
 define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_mask_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %ymm1, %ymm2, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpandd %ymm1, %ymm2, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %ymm1, %ymm2, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vandps %ymm1, %ymm2, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -552,16 +526,14 @@ entry:
 define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_maskz_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpandd %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vandps %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -576,16 +548,14 @@ entry:
 define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_mask_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %xmm1, %xmm2, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpandd %xmm1, %xmm2, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %xmm1, %xmm2, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vandps %xmm1, %xmm2, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -601,16 +571,14 @@ entry:
 define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_maskz_and_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandq %xmm0, %xmm1, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpandd %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_and_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vandps %xmm0, %xmm1, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vandps %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -626,16 +594,14 @@ entry:
 define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_mask_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %ymm2, %ymm1, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpxorq %ymm2, %ymm1, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vxorpd %ymm2, %ymm1, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -651,16 +617,14 @@ entry:
 define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_maskz_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpxorq %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %ymm1, %ymm0, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -676,16 +640,14 @@ entry:
 define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_mask_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %xmm2, %xmm1, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpxorq %xmm2, %xmm1, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %xmm2, %xmm1, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vxorpd %xmm2, %xmm1, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -701,16 +663,14 @@ entry:
 define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_maskz_xor_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpxorq %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_xor_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -726,16 +686,14 @@ entry:
 define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_mask_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %ymm2, %ymm1, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpxord %ymm2, %ymm1, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %ymm2, %ymm1, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vxorps %ymm2, %ymm1, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -750,16 +708,14 @@ entry:
 define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_maskz_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %ymm1, %ymm0, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -774,16 +730,14 @@ entry:
 define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_mask_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %xmm2, %xmm1, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpxord %xmm2, %xmm1, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %xmm2, %xmm1, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vxorps %xmm2, %xmm1, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -799,16 +753,14 @@ entry:
 define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_maskz_xor_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpxorq %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_xor_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -824,16 +776,14 @@ entry:
 define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_mask_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %ymm1, %ymm2, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vporq %ymm1, %ymm2, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %ymm1, %ymm2, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vorpd %ymm1, %ymm2, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -849,16 +799,14 @@ entry:
 define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; KNL-LABEL: test_mm256_maskz_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vporq %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %ymm0, %ymm1, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vorpd %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x double> %__A to <4 x i64>
@@ -874,16 +822,14 @@ entry:
 define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_mask_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %xmm1, %xmm2, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vporq %xmm1, %xmm2, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %xmm1, %xmm2, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vorpd %xmm1, %xmm2, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -899,16 +845,14 @@ entry:
 define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; KNL-LABEL: test_mm_maskz_or_pd:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %xmm0, %xmm1, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vporq %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_or_pd:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <2 x double> %__A to <2 x i64>
@@ -924,16 +868,14 @@ entry:
 define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_mask_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %ymm1, %ymm2, %ymm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; KNL-NEXT:    vpord %ymm1, %ymm2, %ymm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_mask_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %ymm1, %ymm2, %ymm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1}
+; SKX-NEXT:    vorps %ymm1, %ymm2, %ymm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -948,16 +890,14 @@ entry:
 define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; KNL-LABEL: test_mm256_maskz_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; KNL-NEXT:    vpord %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm256_maskz_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    vorps %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <8 x float> %__A to <8 x i32>
@@ -972,16 +912,14 @@ entry:
 define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_mask_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %xmm1, %xmm2, %xmm1
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; KNL-NEXT:    vpord %xmm1, %xmm2, %xmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_mask_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %xmm1, %xmm2, %xmm1
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1}
+; SKX-NEXT:    vorps %xmm1, %xmm2, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>
@@ -997,16 +935,14 @@ entry:
 define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; KNL-LABEL: test_mm_maskz_or_ps:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vporq %xmm0, %xmm1, %xmm0
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vpord %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_mm_maskz_or_ps:
 ; SKX:       ## BB#0: ## %entry
-; SKX-NEXT:    vorps %xmm0, %xmm1, %xmm0
 ; SKX-NEXT:    kmovb %edi, %k1
-; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vorps %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 entry:
   %0 = bitcast <4 x float> %__A to <4 x i32>




More information about the llvm-commits mailing list