[llvm] r314598 - [AVX-512] Add patterns to make fp compare instructions commutable during isel.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 30 10:02:40 PDT 2017
Author: ctopper
Date: Sat Sep 30 10:02:39 2017
New Revision: 314598
URL: http://llvm.org/viewvc/llvm-project?rev=314598&view=rev
Log:
[AVX-512] Add patterns to make fp compare instructions commutable during isel.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=314598&r1=314597&r2=314598&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Sep 30 10:02:39 2017
@@ -2077,7 +2077,33 @@ multiclass avx512_vcmp_common<X86VectorV
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
}
- }
+ }
+
+ // Patterns for selecting with loads in other operand.
+ def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc),
+ (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
+ imm:$cc)>;
+
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
+ (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2,
+ imm:$cc)>;
+
+ def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1), CommutableCMPCC:$cc),
+ (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
+ imm:$cc)>;
+
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
+ (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2,
+ imm:$cc)>;
}
multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
@@ -2119,6 +2145,17 @@ defm VCMPPS : avx512_vcmp<avx512vl_f32_i
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+// Patterns to select fp compares with load as first operand.
+let Predicates = [HasAVX512] in {
+ def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
+}
+
// ----------------------------------------------------------------
// FPClass
//handle fpclass instruction mask = op(reg_scalar,imm)
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=314598&r1=314597&r2=314598&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Sep 30 10:02:39 2017
@@ -2308,6 +2308,58 @@ let Constraints = "$src1 = $dst" in {
SSEPackedDouble, memopv2f64, SSE_ALU_F64P>, PD;
}
+def CommutableCMPCC : PatLeaf<(imm), [{
+ return (N->getZExtValue() == 0x00 || N->getZExtValue() == 0x03 ||
+ N->getZExtValue() == 0x04 || N->getZExtValue() == 0x07);
+}]>;
+
+// Patterns to select compares with loads in first operand.
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
+ CommutableCMPCC:$cc)),
+ (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
+ CommutableCMPCC:$cc)),
+ (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [UseSSE1] in {
+ def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
+ (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+ def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
+ CommutableCMPCC:$cc)),
+ (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-fcmp.ll?rev=314598&r1=314597&r2=314598&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-fcmp.ll Sat Sep 30 10:02:39 2017
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512
;
; Float Comparisons
@@ -17,6 +18,13 @@ define <4 x i32> @commute_cmpps_eq(<4 x
; AVX: # BB#0:
; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_eq:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp oeq <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -33,6 +41,13 @@ define <4 x i32> @commute_cmpps_ne(<4 x
; AVX: # BB#0:
; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ne:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp une <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -49,6 +64,13 @@ define <4 x i32> @commute_cmpps_ord(<4 x
; AVX: # BB#0:
; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ord:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ord <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -65,6 +87,13 @@ define <4 x i32> @commute_cmpps_uno(<4 x
; AVX: # BB#0:
; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_uno:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp uno <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -88,6 +117,16 @@ define <4 x i32> @commute_cmpps_ueq(<4 x
; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ueq:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %xmm1
+; AVX512-NEXT: vcmpeqps %xmm0, %xmm1, %k0
+; AVX512-NEXT: vcmpunordps %xmm0, %xmm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ueq <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -111,6 +150,15 @@ define <4 x i32> @commute_cmpps_one(<4 x
; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %xmm1
+; AVX512-NEXT: vcmpordps %xmm0, %xmm1, %k1
+; AVX512-NEXT: vcmpneqps %xmm0, %xmm1, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp one <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -130,6 +178,14 @@ define <4 x i32> @commute_cmpps_lt(<4 x
; AVX-NEXT: vmovaps (%rdi), %xmm1
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_lt:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %xmm1
+; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp olt <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -149,6 +205,14 @@ define <4 x i32> @commute_cmpps_le(<4 x
; AVX-NEXT: vmovaps (%rdi), %xmm1
; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_le:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %xmm1
+; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ole <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -166,6 +230,13 @@ define <8 x i32> @commute_cmpps_eq_ymm(<
; AVX: # BB#0:
; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_eq_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp oeq <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -183,6 +254,13 @@ define <8 x i32> @commute_cmpps_ne_ymm(<
; AVX: # BB#0:
; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ne_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp une <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -200,6 +278,13 @@ define <8 x i32> @commute_cmpps_ord_ymm(
; AVX: # BB#0:
; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ord_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ord <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -217,6 +302,13 @@ define <8 x i32> @commute_cmpps_uno_ymm(
; AVX: # BB#0:
; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_uno_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp uno <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -245,6 +337,16 @@ define <8 x i32> @commute_cmpps_ueq_ymm(
; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_ueq_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %ymm1
+; AVX512-NEXT: vcmpeqps %ymm0, %ymm1, %k0
+; AVX512-NEXT: vcmpunordps %ymm0, %ymm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ueq <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -273,6 +375,15 @@ define <8 x i32> @commute_cmpps_one_ymm(
; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_one_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %ymm1
+; AVX512-NEXT: vcmpordps %ymm0, %ymm1, %k1
+; AVX512-NEXT: vcmpneqps %ymm0, %ymm1, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp one <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -295,6 +406,14 @@ define <8 x i32> @commute_cmpps_lt_ymm(<
; AVX-NEXT: vmovaps (%rdi), %ymm1
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_lt_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %ymm1
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp olt <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -317,6 +436,14 @@ define <8 x i32> @commute_cmpps_le_ymm(<
; AVX-NEXT: vmovaps (%rdi), %ymm1
; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmpps_le_ymm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %ymm1
+; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ole <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@@ -338,6 +465,13 @@ define <2 x i64> @commute_cmppd_eq(<2 x
; AVX: # BB#0:
; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_eq:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp oeq <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -354,6 +488,13 @@ define <2 x i64> @commute_cmppd_ne(<2 x
; AVX: # BB#0:
; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ne:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp une <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -370,6 +511,13 @@ define <2 x i64> @commute_cmppd_ord(<2 x
; AVX: # BB#0:
; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ord:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ord <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -393,6 +541,16 @@ define <2 x i64> @commute_cmppd_ueq(<2 x
; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ueq:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %xmm1
+; AVX512-NEXT: vcmpeqpd %xmm0, %xmm1, %k0
+; AVX512-NEXT: vcmpunordpd %xmm0, %xmm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ueq <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -416,6 +574,15 @@ define <2 x i64> @commute_cmppd_one(<2 x
; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %xmm1
+; AVX512-NEXT: vcmpordpd %xmm0, %xmm1, %k1
+; AVX512-NEXT: vcmpneqpd %xmm0, %xmm1, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp one <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -432,6 +599,13 @@ define <2 x i64> @commute_cmppd_uno(<2 x
; AVX: # BB#0:
; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_uno:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp uno <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -451,6 +625,14 @@ define <2 x i64> @commute_cmppd_lt(<2 x
; AVX-NEXT: vmovapd (%rdi), %xmm1
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_lt:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %xmm1
+; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp olt <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -470,6 +652,14 @@ define <2 x i64> @commute_cmppd_le(<2 x
; AVX-NEXT: vmovapd (%rdi), %xmm1
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_le:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %xmm1
+; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ole <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -487,6 +677,13 @@ define <4 x i64> @commute_cmppd_eq_ymmm(
; AVX: # BB#0:
; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_eq_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp oeq <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -504,6 +701,13 @@ define <4 x i64> @commute_cmppd_ne_ymmm(
; AVX: # BB#0:
; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ne_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp une <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -521,6 +725,13 @@ define <4 x i64> @commute_cmppd_ord_ymmm
; AVX: # BB#0:
; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ord_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ord <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -538,6 +749,13 @@ define <4 x i64> @commute_cmppd_uno_ymmm
; AVX: # BB#0:
; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_uno_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp uno <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -566,6 +784,16 @@ define <4 x i64> @commute_cmppd_ueq_ymmm
; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_ueq_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %ymm1
+; AVX512-NEXT: vcmpeqpd %ymm0, %ymm1, %k0
+; AVX512-NEXT: vcmpunordpd %ymm0, %ymm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ueq <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -594,6 +822,15 @@ define <4 x i64> @commute_cmppd_one_ymmm
; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_one_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %ymm1
+; AVX512-NEXT: vcmpordpd %ymm0, %ymm1, %k1
+; AVX512-NEXT: vcmpneqpd %ymm0, %ymm1, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp one <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -616,6 +853,14 @@ define <4 x i64> @commute_cmppd_lt_ymmm(
; AVX-NEXT: vmovapd (%rdi), %ymm1
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_lt_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %ymm1
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp olt <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@@ -638,6 +883,14 @@ define <4 x i64> @commute_cmppd_le_ymmm(
; AVX-NEXT: vmovapd (%rdi), %ymm1
; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
+;
+; AVX512-LABEL: commute_cmppd_le_ymmm:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %ymm1
+; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ole <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
More information about the llvm-commits
mailing list