[llvm] [X86][AVX10.2] Support AVX10.2-COMEF new instructions. (PR #108063)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 10 10:47:51 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Mahesh-Attarde (mahesh-attarde)

<details>
<summary>Changes</summary>

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

---

Patch is 59.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108063.diff


12 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+62-25) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+4) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+42) 
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+2-1) 
- (modified) llvm/test/CodeGen/X86/comi-flags.ll (+154-83) 
- (added) llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt (+174) 
- (added) llvm/test/MC/Disassembler/X86/avx512-com-ef-64.txt (+171) 
- (added) llvm/test/MC/X86/avx512-com-ef-32-att.s (+170) 
- (added) llvm/test/MC/X86/avx512-com-ef-32-intel.s (+170) 
- (added) llvm/test/MC/X86/avx512-com-ef-64-att.s (+170) 
- (added) llvm/test/MC/X86/avx512-com-ef-64-intel.s (+170) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+6) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1d466eee691c9..22d5e6a20c9d79 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26060,32 +26060,67 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       if (CC == ISD::SETLT || CC == ISD::SETLE)
         std::swap(LHS, RHS);
 
-      SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+      // For AVX10.2, Support EQ and NE
+      bool HasAVX10_2_COMX =
+          Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
+
+      // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16
+      auto SVT = LHS.getSimpleValueType();
+      bool HasAVX10_2_COMX_Ty =
+          (SVT == MVT::v2f64) || (SVT == MVT::v4f32) || (SVT == MVT::v8f16);
+
+      auto ComiOpCode = IntrData->Opc0;
+      auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
+
+      if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
+        ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
+
+      SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
+
       SDValue SetCC;
-      switch (CC) {
-      case ISD::SETEQ: { // (ZF = 0 and PF = 0)
-        SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
-        SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
-        SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
-        break;
-      }
-      case ISD::SETNE: { // (ZF = 1 or PF = 1)
-        SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
-        SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
-        SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
-        break;
-      }
-      case ISD::SETGT: // (CF = 0 and ZF = 0)
-      case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
-        SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
-        break;
-      }
-      case ISD::SETGE: // CF = 0
-      case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
-        SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
-        break;
-      default:
-        llvm_unreachable("Unexpected illegal condition!");
+      if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) {
+        switch (CC) {
+        case ISD::SETEQ: { // (ZF)
+          SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+          break;
+        }
+        case ISD::SETNE: { // (!ZF)
+          SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+          break;
+        }
+        case ISD::SETGT:
+        case ISD::SETLT:
+        case ISD::SETGE:
+        case ISD::SETLE:
+        default:
+          llvm_unreachable("Un-implemented condition!");
+        }
+      } else {
+        switch (CC) {
+        case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+          SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+          SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
+          SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+          break;
+        }
+        case ISD::SETNE: { // (ZF = 1 or PF = 1)
+          SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+          SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
+          SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+          break;
+        }
+        case ISD::SETGT:   // (CF = 0 and ZF = 0)
+        case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
+          SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
+          break;
+        }
+        case ISD::SETGE: // CF = 0
+        case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
+          SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
+          break;
+        default:
+          llvm_unreachable("Unexpected illegal condition!");
+        }
       }
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
     }
@@ -33845,6 +33880,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(STRICT_FCMPS)
   NODE_NAME_CASE(COMI)
   NODE_NAME_CASE(UCOMI)
+  NODE_NAME_CASE(COMX)
+  NODE_NAME_CASE(UCOMX)
   NODE_NAME_CASE(CMPM)
   NODE_NAME_CASE(CMPMM)
   NODE_NAME_CASE(STRICT_CMPM)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 93d2b3e65742b2..cf9125dd9c3ccf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -87,6 +87,10 @@ namespace llvm {
     COMI,
     UCOMI,
 
+    // X86 compare with Intrinsics similar to COMI
+    COMX,
+    UCOMX,
+
     /// X86 bit-test instructions.
     BT,
 
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index b0eb210b687b19..b2c93455c95de2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1225,3 +1225,45 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
 defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
                                               X86Fnmsub, SchedWriteFMA>;
 }
+
+//-------------------------------------------------
+// AVX10  COMEF instructions
+//-------------------------------------------------
+multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
+                             string OpcodeStr,
+                             Domain d,
+                             X86FoldableSchedWrite sched = WriteFComX> {
+  let ExeDomain = d in {
+    def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+                        !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                        [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
+                        EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+    let mayLoad = 1 in {
+        def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+                            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                            [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
+                            EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+    }
+  }
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
+  defm VCOMXSDZ   :  avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
+                                      "vcomxsd", SSEPackedDouble>,
+                                      TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+  defm VCOMXSHZ   :  avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
+                                      "vcomxsh", SSEPackedSingle>,
+                                      T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+  defm VCOMXSSZ   :  avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
+                                      "vcomxss", SSEPackedSingle>,
+                                      TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+  defm VUCOMXSDZ  :  avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
+                                      "vucomxsd", SSEPackedDouble>,
+                                      TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+  defm VUCOMXSHZ  :  avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
+                                      "vucomxsh", SSEPackedSingle>,
+                                      T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+  defm VUCOMXSSZ  :  avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
+                                      "vucomxss", SSEPackedSingle>,
+                                      TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 59bfd2bcbabc26..fb6920042734a1 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,7 +61,8 @@ def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
 def X86hsub    : SDNode<"X86ISD::HSUB",      SDTIntBinOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86FCmp>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86FCmp>;
-
+def X86comi512       : SDNode<"X86ISD::COMX",      SDTX86FCmp>;
+def X86ucomi512      : SDNode<"X86ISD::UCOMX",     SDTX86FCmp>;
 def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
                                       SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
 def X86cmps    : SDNode<"X86ISD::FSETCC",    SDTX86Cmps>;
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 8b7a089f0ce872..6f520aa57dcd09 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx  | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx  | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
 
 ;
 ; SSE
@@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse_comieq_ss:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
-; AVX-NEXT:    vcomiss %xmm1, %xmm0
-; AVX-NEXT:    setnp %cl
-; AVX-NEXT:    sete %dl
-; AVX-NEXT:    testb %cl, %dl
-; AVX-NEXT:    cmovnel %esi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %edi, %eax
+; NO-AVX10_2-NEXT:    vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setnp %cl
+; NO-AVX10_2-NEXT:    sete %dl
+; NO-AVX10_2-NEXT:    testb %cl, %dl
+; NO-AVX10_2-NEXT:    cmovnel %esi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp eq i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
 ; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse_comineq_ss:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %esi, %eax
-; AVX-NEXT:    vcomiss %xmm1, %xmm0
-; AVX-NEXT:    cmovnel %edi, %eax
-; AVX-NEXT:    cmovpl %edi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %esi, %eax
+; NO-AVX10_2-NEXT:    vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    cmovnel %edi, %eax
+; NO-AVX10_2-NEXT:    cmovpl %edi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp ne i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse_ucomieq_ss:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
-; AVX-NEXT:    vucomiss %xmm1, %xmm0
-; AVX-NEXT:    setnp %cl
-; AVX-NEXT:    sete %dl
-; AVX-NEXT:    testb %cl, %dl
-; AVX-NEXT:    cmovnel %esi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %edi, %eax
+; NO-AVX10_2-NEXT:    vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setnp %cl
+; NO-AVX10_2-NEXT:    sete %dl
+; NO-AVX10_2-NEXT:    testb %cl, %dl
+; NO-AVX10_2-NEXT:    cmovnel %esi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp eq i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2,
 ; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse_ucomineq_ss:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %esi, %eax
-; AVX-NEXT:    vucomiss %xmm1, %xmm0
-; AVX-NEXT:    cmovnel %edi, %eax
-; AVX-NEXT:    cmovpl %edi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %esi, %eax
+; NO-AVX10_2-NEXT:    vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    cmovnel %edi, %eax
+; NO-AVX10_2-NEXT:    cmovpl %edi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp ne i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse2_comieq_sd:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
-; AVX-NEXT:    vcomisd %xmm1, %xmm0
-; AVX-NEXT:    setnp %cl
-; AVX-NEXT:    sete %dl
-; AVX-NEXT:    testb %cl, %dl
-; AVX-NEXT:    cmovnel %esi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %edi, %eax
+; NO-AVX10_2-NEXT:    vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setnp %cl
+; NO-AVX10_2-NEXT:    sete %dl
+; NO-AVX10_2-NEXT:    testb %cl, %dl
+; NO-AVX10_2-NEXT:    cmovnel %esi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp eq i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
 ; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse2_comineq_sd:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %esi, %eax
-; AVX-NEXT:    vcomisd %xmm1, %xmm0
-; AVX-NEXT:    cmovnel %edi, %eax
-; AVX-NEXT:    cmovpl %edi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %esi, %eax
+; NO-AVX10_2-NEXT:    vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    cmovnel %edi, %eax
+; NO-AVX10_2-NEXT:    cmovpl %edi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp ne i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse2_ucomieq_sd:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
-; AVX-NEXT:    vucomisd %xmm1, %xmm0
-; AVX-NEXT:    setnp %cl
-; AVX-NEXT:    sete %dl
-; AVX-NEXT:    testb %cl, %dl
-; AVX-NEXT:    cmovnel %esi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %edi, %eax
+; NO-AVX10_2-NEXT:    vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setnp %cl
+; NO-AVX10_2-NEXT:    sete %dl
+; NO-AVX10_2-NEXT:    testb %cl, %dl
+; NO-AVX10_2-NEXT:    cmovnel %esi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp eq i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a
 ; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: test_x86_sse2_ucomineq_sd:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl %esi, %eax
-; AVX-NEXT:    vucomisd %xmm1, %xmm0
-; AVX-NEXT:    cmovnel %edi, %eax
-; AVX-NEXT:    cmovpl %edi, %eax
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; NO-AVX10_2:       # %bb.0:
+; NO-AVX10_2-NEXT:    movl %esi, %eax
+; NO-AVX10_2-NEXT:    vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    cmovnel %edi, %eax
+; NO-AVX10_2-NEXT:    cmovpl %edi, %eax
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; AVX10_2:       # %bb.0:
+; AVX10_2-NEXT:    movl %edi, %eax
+; AVX10_2-NEXT:    vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT:    cmovel %esi, %eax
+; AVX10_2-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp ne i32 %call, 0
   %res = select i1 %cmp, i32 %a2, i32 %a3
@@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:  # %bb.1: # %if.end
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: PR38960_eq:
-; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vcomiss %xmm1, %xmm0
-; AVX-NEXT:    setnp %al
-; AVX-NEXT:    sete %cl
-; AVX-NEXT:    testb %al, %cl
-; AVX-NEXT:    jne foo at PLT # TAILCALL
-; AVX-NEXT:  # %bb.1: # %if.end
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: PR38960_eq:
+; NO-AVX10_2:       # %bb.0: # %entry
+; NO-AVX10_2-NEXT:    vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setnp %al
+; NO-AVX10_2-NEXT:    sete %cl
+; NO-AVX10_2-NEXT:    testb %al, %cl
+; NO-AVX10_2-NEXT:    jne foo at PLT # TAILCALL
+; NO-AVX10_2-NEXT:  # %bb.1: # %if.end
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: PR38960_eq:
+; AVX10_2:       # %bb.0: # %entry
+; AVX10_2-NEXT:    vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    je foo at PLT # TAILCALL
+; AVX10_2-NEXT:  # %bb.1: # %if.end
+; AVX10_2-NEXT:    retq
 entry:
   %call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3
   %cmp = icmp eq i32 %call, 0
@@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:  # %bb.1: # %if.end
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: PR38960_neq:
-; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vcomiss %xmm1, %xmm0
-; AVX-NEXT:    setp %al
-; AVX-NEXT:    setne %cl
-; AVX-NEXT:    orb %al, %cl
-; AVX-NEXT:    jne foo at PLT # TAILCALL
-; AVX-NEXT:  # %bb.1: # %if.end
-; AVX-NEXT:    retq
+; NO-AVX10_2-LABEL: PR38960_neq:
+; NO-AVX10_2:       # %bb.0: # %entry
+; NO-AVX10_2-NEXT:    vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT:    setp %al
+; NO-AVX10_2-NEXT:    setne %cl
+; NO-AVX10_2-NEXT:    orb %al, %cl
+; NO-AVX10_2-NEXT:    jne foo at PLT # TAILCALL
+; NO-AVX10_2-NEXT:  # %bb.1: # %if.end
+; NO-AVX10_2-NEXT:    retq
+;
+; AVX10_2-LABEL: PR38960_neq:
+; AVX10_2:       # %bb.0: # %entry
+; AVX10_2-NEXT:    vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT:    jne foo at PLT # TAILCALL
+; AVX10_2-NEXT:  # %bb.1: # %if.end
+; AVX10_2-NEXT:    retq
 entry:
   %call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3
   %cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
new file mode 100644
index 00000000000000..f762601c9f6221
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
@@ -0,0 +1,174 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vcomxsd %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2f,0xd3
+
+# ATT...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/108063


More information about the llvm-commits mailing list