[llvm] [X86][AVX512] Use comx for compare (PR #113567)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 27 21:46:38 PDT 2024
https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/113567
>From 09d1f74552c1f07ff9d7e1151b8e6e05a1801093 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Wed, 23 Oct 2024 22:29:30 -0700
Subject: [PATCH 1/8] update compare opt
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++++++++
llvm/lib/Target/X86/X86InstrAVX10.td | 28 +++++++++++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 3 +++
3 files changed, 44 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bcb84add65d83e..062d4baf99ffa9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1056,6 +1056,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if(Subtarget.hasAVX10_2_512()){
+ for (auto FVT : { MVT::f16, MVT::f32, MVT::f64 }) {
+ setOperationAction(ISD::SETCC, FVT, Custom);
+ }
+ }
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
@@ -49520,6 +49525,14 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ // VCOMXSS simplifies conditional code sequence into single setcc
+ // node. Earlier until COMI, it required upto 2 SETCC's to test CC.
+ if (Subtarget.hasAVX10_2()) {
+ return getSETCC(
+ ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
+ DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL,
+ DAG);
+ }
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..c67ef49940e513 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_
//-------------------------------------------------
// AVX10 COMEF instructions
//-------------------------------------------------
+multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
+ SDPatternOperator OpNode, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Domain d, X86FoldableSchedWrite sched = WriteFComX>{
+ let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
+ def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ }
+}
+
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
string OpcodeStr,
Domain d,
@@ -1564,6 +1582,16 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
}
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+
+ defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
+ "vucomxsd", f64mem, loadf64, SSEPackedSingle>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
+ "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
+ "vucomxss", f32mem, loadf32, SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
"vcomxsd", SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..fd6ee37d27e147 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = {
{X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VUCOMISSrr, X86::VUCOMISSrm, 0},
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0},
{X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0},
{X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
{X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
{X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
{X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},
>From 44baff27402285d5294935a2ca8f2edccc992d79 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Wed, 23 Oct 2024 22:29:54 -0700
Subject: [PATCH 2/8] add test
---
llvm/test/CodeGen/X86/avx10_2-cmp.ll | 237 +++++++++++++++++++++++++++
1 file changed, 237 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/avx10_2-cmp.ll
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
new file mode 100644
index 00000000000000..62a187c3adc741
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
+
+define i1 @hoeq(half %x, half %y) {
+; X64-LABEL: hoeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsh %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hoeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq half %x, %y
+ ret i1 %1
+}
+
+define i1 @hune(half %x, half %y) {
+; X64-LABEL: hune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsh %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @hoeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hoeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hoeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp oeq half %x, %y
+ ret i1 %1
+}
+
+define i1 @hune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @foeq(float %x, float %y) {
+; X64-LABEL: foeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: foeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @fune(float %x, float %y) {
+; X64-LABEL: fune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: fune:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+define i1 @foeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: foeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: foeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @fune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: fune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: fune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+define i1 @doeq(double %x, double %y) {
+; X64-LABEL: doeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
+
+define i1 @dune(double %x, double %y) {
+; X64-LABEL: dune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: dune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une double %x, %y
+ ret i1 %1
+}
+
+define i1 @doeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: doeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
+
+define i1 @dune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: dune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: dune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp une double %x, %y
+ ret i1 %1
+}
>From d242ba8c2e728375c43270a3ba6672b62ea51265 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Thu, 24 Oct 2024 03:32:18 -0700
Subject: [PATCH 3/8] update mod
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 062d4baf99ffa9..6792885a405a5c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -219,8 +219,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// SETOEQ and SETUNE require checking two conditions.
for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
- setCondCodeAction(ISD::SETOEQ, VT, Expand);
- setCondCodeAction(ISD::SETUNE, VT, Expand);
+ setCondCodeAction(ISD::SETOEQ, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand);
}
// Integer absolute.
@@ -1056,11 +1056,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- if(Subtarget.hasAVX10_2_512()){
- for (auto FVT : { MVT::f16, MVT::f32, MVT::f64 }) {
- setOperationAction(ISD::SETCC, FVT, Custom);
- }
- }
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
@@ -2447,7 +2442,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
}
-
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
@@ -24078,6 +24073,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
+ if(Subtarget.hasAVX10_2_512()){
+ if( CC == ISD::SETOEQ || CC == ISD::SETUNE){
+ auto NewCC = (CC == ISD:::SETOEQ) ? X86::COND_E : (X86::COND_NE);
+ return getSETCC(NewCC,
+ DAG.getNode(X86ISD::UCOMX,
+ dl, MVT::i32, Op0, Op1), dl, DAG);
+ }
+ }
// Handle floating point.
X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG);
if (CondCode == X86::COND_INVALID)
>From f56ad5c6b823c1f53df16d826cee8c71f29596ff Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Thu, 24 Oct 2024 06:21:00 -0700
Subject: [PATCH 4/8] add legal CC in 10.2
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++++--------------
1 file changed, 16 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6792885a405a5c..42c8175ccc92cb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -218,10 +218,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SETOEQ and SETUNE require checking two conditions.
- for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
- setCondCodeAction(ISD::SETOEQ, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand);
- setCondCodeAction(ISD::SETUNE, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand);
+ for (auto VT : {MVT::f32, MVT::f64}) {
+ setCondCodeAction(ISD::SETOEQ, VT,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
+ setCondCodeAction(ISD::SETUNE, VT,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
}
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Integer absolute.
if (Subtarget.canUseCMOV()) {
@@ -2292,8 +2296,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f16,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f16,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
@@ -24073,12 +24079,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
- if(Subtarget.hasAVX10_2_512()){
- if( CC == ISD::SETOEQ || CC == ISD::SETUNE){
- auto NewCC = (CC == ISD:::SETOEQ) ? X86::COND_E : (X86::COND_NE);
- return getSETCC(NewCC,
- DAG.getNode(X86ISD::UCOMX,
- dl, MVT::i32, Op0, Op1), dl, DAG);
+ if (Subtarget.hasAVX10_2_512()) {
+ if (CC == ISD::SETOEQ || CC == ISD::SETUNE) {
+ auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE);
+ return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1),
+ dl, DAG);
}
}
// Handle floating point.
@@ -49528,14 +49533,6 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- // VCOMXSS simplifies conditional code sequence into single setcc
- // node. Earlier until COMI, it required upto 2 SETCC's to test CC.
- if (Subtarget.hasAVX10_2()) {
- return getSETCC(
- ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
- DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL,
- DAG);
- }
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
>From cdd0050d810c397416807387cff22dbcdad5e7d0 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Thu, 24 Oct 2024 06:28:58 -0700
Subject: [PATCH 5/8] remove space
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 42c8175ccc92cb..7ff5f230fbfe60 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2448,7 +2448,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
}
-
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
>From 6aa7edfe5694d3de8dde99a552dd15acc3312ab9 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Thu, 24 Oct 2024 10:45:53 -0700
Subject: [PATCH 6/8] update review comment
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++--------
llvm/lib/Target/X86/X86InstrAVX10.td | 3 +--
llvm/test/CodeGen/X86/avx10_2-cmp.ll | 4 ++--
3 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7ff5f230fbfe60..0da021e9b533d9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -218,15 +218,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SETOEQ and SETUNE require checking two conditions.
- for (auto VT : {MVT::f32, MVT::f64}) {
- setCondCodeAction(ISD::SETOEQ, VT,
- Subtarget.hasAVX10_2() ? Custom : Expand);
- setCondCodeAction(ISD::SETUNE, VT,
- Subtarget.hasAVX10_2() ? Custom : Expand);
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
+ setCondCodeAction(ISD::SETOEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
}
- setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+ if (Subtarget.hasAVX10_2()) {
+ for (auto VT : {MVT::f32, MVT::f64}) {
+ setCondCodeAction(ISD::SETOEQ, VT, Custom);
+ setCondCodeAction(ISD::SETUNE, VT, Custom);
+ }
+ }
// Integer absolute.
if (Subtarget.canUseCMOV()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
@@ -24078,7 +24080,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
- if (Subtarget.hasAVX10_2_512()) {
+ if (Subtarget.hasAVX10_2()) {
if (CC == ISD::SETOEQ || CC == ISD::SETUNE) {
auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE);
return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1),
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index c67ef49940e513..1a1255532b773f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1582,9 +1582,8 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
}
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
-
defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
- "vucomxsd", f64mem, loadf64, SSEPackedSingle>,
+ "vucomxsd", f64mem, loadf64, SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
"vucomxsh", f16mem, loadf16, SSEPackedSingle>,
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
index 62a187c3adc741..de0bec7ea2695a 100644
--- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
define i1 @hoeq(half %x, half %y) {
; X64-LABEL: hoeq:
>From 92c94714bd3504e03e04516ce87585ccd554a6bd Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Thu, 24 Oct 2024 22:25:18 -0700
Subject: [PATCH 7/8] remove space
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0da021e9b533d9..d691346020a02a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2450,6 +2450,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
}
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
>From da9493cccf77f1cce2388958ee1c5272aca44afa Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Sun, 27 Oct 2024 21:45:58 -0700
Subject: [PATCH 8/8] update review comments faulty commit
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d691346020a02a..cd17ad5571572c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -224,7 +224,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasAVX10_2()) {
- for (auto VT : {MVT::f32, MVT::f64}) {
+ for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
setCondCodeAction(ISD::SETUNE, VT, Custom);
}
@@ -2298,10 +2298,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f16,
- Subtarget.hasAVX10_2() ? Custom : Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f16,
- Subtarget.hasAVX10_2() ? Custom : Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
More information about the llvm-commits
mailing list