[llvm] [X86][AVX10.2] Support AVX10.2-COMEF new instructions. (PR #108063)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 10 10:47:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Mahesh-Attarde (mahesh-attarde)
<details>
<summary>Changes</summary>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
---
Patch is 59.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108063.diff
12 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+62-25)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+4)
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+42)
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+2-1)
- (modified) llvm/test/CodeGen/X86/comi-flags.ll (+154-83)
- (added) llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt (+174)
- (added) llvm/test/MC/Disassembler/X86/avx512-com-ef-64.txt (+171)
- (added) llvm/test/MC/X86/avx512-com-ef-32-att.s (+170)
- (added) llvm/test/MC/X86/avx512-com-ef-32-intel.s (+170)
- (added) llvm/test/MC/X86/avx512-com-ef-64-att.s (+170)
- (added) llvm/test/MC/X86/avx512-com-ef-64-intel.s (+170)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+6)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1d466eee691c9..22d5e6a20c9d79 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26060,32 +26060,67 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (CC == ISD::SETLT || CC == ISD::SETLE)
std::swap(LHS, RHS);
- SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ // For AVX10.2, Support EQ and NE
+ bool HasAVX10_2_COMX =
+ Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
+
+ // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16
+ auto SVT = LHS.getSimpleValueType();
+ bool HasAVX10_2_COMX_Ty =
+ (SVT == MVT::v2f64) || (SVT == MVT::v4f32) || (SVT == MVT::v8f16);
+
+ auto ComiOpCode = IntrData->Opc0;
+ auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
+
+ if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
+ ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
+
+ SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
+
SDValue SetCC;
- switch (CC) {
- case ISD::SETEQ: { // (ZF = 0 and PF = 0)
- SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
- SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
- break;
- }
- case ISD::SETNE: { // (ZF = 1 or PF = 1)
- SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
- SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
- break;
- }
- case ISD::SETGT: // (CF = 0 and ZF = 0)
- case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
- SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
- break;
- }
- case ISD::SETGE: // CF = 0
- case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
- SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
- break;
- default:
- llvm_unreachable("Unexpected illegal condition!");
+ if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETNE: { // (!ZF)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETGE:
+ case ISD::SETLE:
+ default:
+ llvm_unreachable("Un-implemented condition!");
+ }
+ } else {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+ break;
+ }
+ case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+ break;
+ }
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
+ case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
+ SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGE: // CF = 0
+ case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
+ SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
+ break;
+ default:
+ llvm_unreachable("Unexpected illegal condition!");
+ }
}
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
@@ -33845,6 +33880,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCMPS)
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
NODE_NAME_CASE(CMPM)
NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 93d2b3e65742b2..cf9125dd9c3ccf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -87,6 +87,10 @@ namespace llvm {
COMI,
UCOMI,
+ // X86 compare with Intrinsics similar to COMI
+ COMX,
+ UCOMX,
+
/// X86 bit-test instructions.
BT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index b0eb210b687b19..b2c93455c95de2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1225,3 +1225,45 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 COMEF instructions
+//-------------------------------------------------
+multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr,
+ Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d in {
+ def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ }
+ }
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
+ defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
+ "vcomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
+ "vcomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
+ "vcomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
+ "vucomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
+ "vucomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
+ "vucomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 59bfd2bcbabc26..fb6920042734a1 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
-
+def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
+def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 8b7a089f0ce872..6f520aa57dcd09 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
;
; SSE
@@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2,
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_eq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %al
-; AVX-NEXT: sete %cl
-; AVX-NEXT: testb %al, %cl
-; AVX-NEXT: jne foo at PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_eq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %al
+; NO-AVX10_2-NEXT: sete %cl
+; NO-AVX10_2-NEXT: testb %al, %cl
+; NO-AVX10_2-NEXT: jne foo at PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_eq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: je foo at PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
@@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_neq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: jne foo at PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_neq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setp %al
+; NO-AVX10_2-NEXT: setne %cl
+; NO-AVX10_2-NEXT: orb %al, %cl
+; NO-AVX10_2-NEXT: jne foo at PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_neq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: jne foo at PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
new file mode 100644
index 00000000000000..f762601c9f6221
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
@@ -0,0 +1,174 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcomxsd %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2f,0xd3
+
+# ATT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108063
More information about the llvm-commits
mailing list