[llvm] [X86][AVX512] Use comx for compare (PR #113098)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 02:17:56 PDT 2024
https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/113098
>From c8792ad1fdf8f3402627a6b4a5c402715fadeecf Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Sun, 20 Oct 2024 11:28:34 -0700
Subject: [PATCH 1/4] update comef opt on 10_2
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++
llvm/lib/Target/X86/X86InstrAVX10.td | 23 +++++
llvm/test/CodeGen/X86/avx10_2-cmp.ll | 121 ++++++++++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 1 +
4 files changed, 154 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/avx10_2-cmp.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bcb84add65d83e..22fcd3bf6bc8eb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49520,6 +49520,15 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+
+ // VCOMXSS simplifies conditional code sequence into single setcc node
+ // and a CC node, Earlier until COMI, it required 2 SETCC's
+ if (Subtarget.hasAVX10_2()) {
+ return getSETCC(
+ ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
+ DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL,
+ DAG);
+ }
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..f9687897728382 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_
//-------------------------------------------------
// AVX10 COMEF instructions
//-------------------------------------------------
+multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
+ SDPatternOperator OpNode, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Domain d, X86FoldableSchedWrite sched = WriteFComX>{
+ let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
+ def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ }
+}
+
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
string OpcodeStr,
Domain d,
@@ -1564,6 +1582,11 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
}
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+
+ defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
+ "vucomxss", f32mem, loadf32, SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
"vcomxsd", SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
new file mode 100644
index 00000000000000..8c134e21070b82
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X86
+
+define i1 @oeq(float %x, float %y) {
+; AVX10_2_X64-LABEL: oeq:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2_X64-NEXT: sete %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: oeq:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; AVX10_2_X86-NEXT: sete %al
+; AVX10_2_X86-NEXT: retl
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @une(float %x, float %y) {
+; AVX10_2_X64-LABEL: une:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2_X64-NEXT: setne %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: une:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; AVX10_2_X86-NEXT: setne %al
+; AVX10_2_X86-NEXT: retl
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+define i1 @ogt(float %x, float %y) {
+; AVX10_2_X64-LABEL: ogt:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vucomiss %xmm1, %xmm0
+; AVX10_2_X64-NEXT: seta %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: ogt:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
+; AVX10_2_X86-NEXT: seta %al
+; AVX10_2_X86-NEXT: retl
+ %1 = fcmp ogt float %x, %y
+ ret i1 %1
+}
+
+define i1 @oeq_mem(ptr %xp, ptr %yp) {
+; AVX10_2_X64-LABEL: oeq_mem:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
+; AVX10_2_X64-NEXT: sete %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: oeq_mem:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
+; AVX10_2_X86-NEXT: sete %al
+; AVX10_2_X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @une_mem(ptr %xp, ptr %yp) {
+; AVX10_2_X64-LABEL: une_mem:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
+; AVX10_2_X64-NEXT: setne %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: une_mem:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
+; AVX10_2_X86-NEXT: setne %al
+; AVX10_2_X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+
+define i1 @ogt_mem(ptr %xp, ptr %yp) {
+; AVX10_2_X64-LABEL: ogt_mem:
+; AVX10_2_X64: # %bb.0:
+; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X64-NEXT: vucomiss (%rsi), %xmm0
+; AVX10_2_X64-NEXT: seta %al
+; AVX10_2_X64-NEXT: retq
+;
+; AVX10_2_X86-LABEL: ogt_mem:
+; AVX10_2_X86: # %bb.0:
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX10_2_X86-NEXT: vucomiss (%eax), %xmm0
+; AVX10_2_X86-NEXT: seta %al
+; AVX10_2_X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp ogt float %x, %y
+ ret i1 %1
+}
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..e444c61354abe6 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1961,6 +1961,7 @@ static const X86FoldTableEntry Table1[] = {
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
{X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
{X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
{X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
{X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
{X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},
>From 28825606c5f538adb46eb354cf6eaacc9427fcbc Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Sun, 20 Oct 2024 11:40:05 -0700
Subject: [PATCH 2/4] update comment and format
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 22fcd3bf6bc8eb..f6c34ccc4b0454 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49521,8 +49521,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- // VCOMXSS simplifies conditional code sequence into single setcc node
- // and a CC node, Earlier until COMI, it required 2 SETCC's
+ // VCOMXSS simplifies conditional code sequence into single setcc node.
+ // Earlier until COMI, it required upto 2 SETCC's to test CC.
if (Subtarget.hasAVX10_2()) {
return getSETCC(
((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
>From 06ab8e750a583b844a5bdb63f37aec55ebd342a4 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Sun, 20 Oct 2024 11:47:58 -0700
Subject: [PATCH 3/4] address review comments
---
llvm/test/CodeGen/X86/avx10_2-cmp.ll | 154 +++++++++++++--------------
1 file changed, 77 insertions(+), 77 deletions(-)
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
index 8c134e21070b82..29313fc7cc2593 100644
--- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -1,74 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X64
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
define i1 @oeq(float %x, float %y) {
-; AVX10_2_X64-LABEL: oeq:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
-; AVX10_2_X64-NEXT: sete %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: oeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: oeq:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
-; AVX10_2_X86-NEXT: sete %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: oeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
%1 = fcmp oeq float %x, %y
ret i1 %1
}
define i1 @une(float %x, float %y) {
-; AVX10_2_X64-LABEL: une:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
-; AVX10_2_X64-NEXT: setne %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: une:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: une:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
-; AVX10_2_X86-NEXT: setne %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: une:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
%1 = fcmp une float %x, %y
ret i1 %1
}
define i1 @ogt(float %x, float %y) {
-; AVX10_2_X64-LABEL: ogt:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vucomiss %xmm1, %xmm0
-; AVX10_2_X64-NEXT: seta %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: ogt:
+; X64: # %bb.0:
+; X64-NEXT: vucomiss %xmm1, %xmm0
+; X64-NEXT: seta %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: ogt:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
-; AVX10_2_X86-NEXT: seta %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: ogt:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: seta %al
+; X86-NEXT: retl
%1 = fcmp ogt float %x, %y
ret i1 %1
}
define i1 @oeq_mem(ptr %xp, ptr %yp) {
-; AVX10_2_X64-LABEL: oeq_mem:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
-; AVX10_2_X64-NEXT: sete %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: oeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: oeq_mem:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
-; AVX10_2_X86-NEXT: sete %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: oeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
%x = load float, ptr %xp
%y = load float, ptr %yp
%1 = fcmp oeq float %x, %y
@@ -76,21 +76,21 @@ define i1 @oeq_mem(ptr %xp, ptr %yp) {
}
define i1 @une_mem(ptr %xp, ptr %yp) {
-; AVX10_2_X64-LABEL: une_mem:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
-; AVX10_2_X64-NEXT: setne %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: une_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: une_mem:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
-; AVX10_2_X86-NEXT: setne %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: une_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
%x = load float, ptr %xp
%y = load float, ptr %yp
%1 = fcmp une float %x, %y
@@ -99,21 +99,21 @@ define i1 @une_mem(ptr %xp, ptr %yp) {
define i1 @ogt_mem(ptr %xp, ptr %yp) {
-; AVX10_2_X64-LABEL: ogt_mem:
-; AVX10_2_X64: # %bb.0:
-; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X64-NEXT: vucomiss (%rsi), %xmm0
-; AVX10_2_X64-NEXT: seta %al
-; AVX10_2_X64-NEXT: retq
+; X64-LABEL: ogt_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomiss (%rsi), %xmm0
+; X64-NEXT: seta %al
+; X64-NEXT: retq
;
-; AVX10_2_X86-LABEL: ogt_mem:
-; AVX10_2_X86: # %bb.0:
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX10_2_X86-NEXT: vucomiss (%eax), %xmm0
-; AVX10_2_X86-NEXT: seta %al
-; AVX10_2_X86-NEXT: retl
+; X86-LABEL: ogt_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomiss (%eax), %xmm0
+; X86-NEXT: seta %al
+; X86-NEXT: retl
%x = load float, ptr %xp
%y = load float, ptr %yp
%1 = fcmp ogt float %x, %y
>From 6d5f26cda85c89d61d1a39a15e34cccabb5c65f5 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Mon, 21 Oct 2024 02:17:38 -0700
Subject: [PATCH 4/4] update review comments for sh,sd and format
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 6 +
llvm/test/CodeGen/X86/avx10_2-cmp.ll | 194 +++++++++++++++++++-----
llvm/test/TableGen/x86-fold-tables.inc | 2 +
4 files changed, 165 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f6c34ccc4b0454..71983a7d7c7154 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49521,8 +49521,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- // VCOMXSS simplifies conditional code sequence into single setcc node.
- // Earlier until COMI, it required upto 2 SETCC's to test CC.
+ // VCOMXSS simplifies conditional code sequence into single setcc
+ // node. Earlier until COMI, it required upto 2 SETCC's to test CC.
if (Subtarget.hasAVX10_2()) {
return getSETCC(
((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index f9687897728382..367fd67bec5351 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1583,6 +1583,12 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+ defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
+ "vucomxsd", f64mem, loadf64, SSEPackedSingle>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
+ "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
"vucomxss", f32mem, loadf32, SSEPackedSingle>,
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
index 29313fc7cc2593..62a187c3adc741 100644
--- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -2,66 +2,127 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
-define i1 @oeq(float %x, float %y) {
-; X64-LABEL: oeq:
+define i1 @hoeq(half %x, half %y) {
+; X64-LABEL: hoeq:
; X64: # %bb.0:
-; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: vucomxsh %xmm1, %xmm0
; X64-NEXT: sete %al
; X64-NEXT: retq
;
-; X86-LABEL: oeq:
+; X86-LABEL: hoeq:
; X86: # %bb.0:
-; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: sete %al
; X86-NEXT: retl
- %1 = fcmp oeq float %x, %y
+ %1 = fcmp oeq half %x, %y
ret i1 %1
}
-define i1 @une(float %x, float %y) {
-; X64-LABEL: une:
+define i1 @hune(half %x, half %y) {
+; X64-LABEL: hune:
; X64: # %bb.0:
-; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: vucomxsh %xmm1, %xmm0
; X64-NEXT: setne %al
; X64-NEXT: retq
;
-; X86-LABEL: une:
+; X86-LABEL: hune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @hoeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hoeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hoeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp oeq half %x, %y
+ ret i1 %1
+}
+
+define i1 @hune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @foeq(float %x, float %y) {
+; X64-LABEL: foeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: foeq:
; X86: # %bb.0:
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: setne %al
+; X86-NEXT: sete %al
; X86-NEXT: retl
- %1 = fcmp une float %x, %y
+ %1 = fcmp oeq float %x, %y
ret i1 %1
}
-define i1 @ogt(float %x, float %y) {
-; X64-LABEL: ogt:
+define i1 @fune(float %x, float %y) {
+; X64-LABEL: fune:
; X64: # %bb.0:
-; X64-NEXT: vucomiss %xmm1, %xmm0
-; X64-NEXT: seta %al
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
;
-; X86-LABEL: ogt:
+; X86-LABEL: fune:
; X86: # %bb.0:
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: seta %al
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
; X86-NEXT: retl
- %1 = fcmp ogt float %x, %y
+ %1 = fcmp une float %x, %y
ret i1 %1
}
-define i1 @oeq_mem(ptr %xp, ptr %yp) {
-; X64-LABEL: oeq_mem:
+define i1 @foeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: foeq_mem:
; X64: # %bb.0:
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: vucomxss (%rsi), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: retq
;
-; X86-LABEL: oeq_mem:
+; X86-LABEL: foeq_mem:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -75,15 +136,15 @@ define i1 @oeq_mem(ptr %xp, ptr %yp) {
ret i1 %1
}
-define i1 @une_mem(ptr %xp, ptr %yp) {
-; X64-LABEL: une_mem:
+define i1 @fune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: fune_mem:
; X64: # %bb.0:
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: vucomxss (%rsi), %xmm0
; X64-NEXT: setne %al
; X64-NEXT: retq
;
-; X86-LABEL: une_mem:
+; X86-LABEL: fune_mem:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -97,25 +158,80 @@ define i1 @une_mem(ptr %xp, ptr %yp) {
ret i1 %1
}
+define i1 @doeq(double %x, double %y) {
+; X64-LABEL: doeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
-define i1 @ogt_mem(ptr %xp, ptr %yp) {
-; X64-LABEL: ogt_mem:
+define i1 @dune(double %x, double %y) {
+; X64-LABEL: dune:
; X64: # %bb.0:
-; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vucomiss (%rsi), %xmm0
-; X64-NEXT: seta %al
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
;
-; X86-LABEL: ogt_mem:
+; X86-LABEL: dune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une double %x, %y
+ ret i1 %1
+}
+
+define i1 @doeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: doeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq_mem:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vucomiss (%eax), %xmm0
-; X86-NEXT: seta %al
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: sete %al
; X86-NEXT: retl
- %x = load float, ptr %xp
- %y = load float, ptr %yp
- %1 = fcmp ogt float %x, %y
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
+
+define i1 @dune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: dune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: dune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp une double %x, %y
ret i1 %1
}
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index e444c61354abe6..fd6ee37d27e147 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1959,7 +1959,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VUCOMISSrr, X86::VUCOMISSrm, 0},
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0},
{X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0},
{X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
{X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
{X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
More information about the llvm-commits
mailing list