[llvm] [SDAG[[X86] Add method to scalarize `STRICT_FSETCC` (PR #154486)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 00:34:21 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-x86
Author: Abhishek Kaushik (abhishek-kaushik22)
<details>
<summary>Changes</summary>
Fixes #<!-- -->154485
---
Patch is 50.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154486.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+41)
- (added) llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll (+1298)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 33fa3012618b3..65fd863e55ac9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -909,6 +909,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
+ SDValue ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a0fd855832a33 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -789,6 +789,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC:
Res = ScalarizeVecOp_VSETCC(N);
break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ Res = ScalarizeVecOp_VSTRICT_FSETCC(N, OpNo);
+ break;
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -985,6 +989,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
}
+// Similiar to ScalarizeVecOp_VSETCC, with added logic to update chains.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Wrong operand for scalarization!");
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(1).getValueType().isVector() &&
+ "Operand types must be vectors");
+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+ EVT VT = N->getValueType(0);
+ SDValue Ch = N->getOperand(0);
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ SDValue RHS = GetScalarizedVector(N->getOperand(2));
+ SDValue CC = N->getOperand(3);
+
+ EVT OpVT = N->getOperand(1).getValueType();
+ EVT NVT = VT.getVectorElementType();
+ SDLoc DL(N);
+ SDValue Res = DAG.getNode(N->getOpcode(), DL, {MVT::i1, MVT::Other},
+ {Ch, LHS, RHS, CC});
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
/// If the value to store is a vector that needs to be scalarized, it must be
/// <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
diff --git a/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll b/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll
new file mode 100644
index 0000000000000..f34890c053517
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll
@@ -0,0 +1,1298 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
+
+define <4 x i1> @test_oeq_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oeq_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %cl
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: andb %cl, %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %cl
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: andb %cl, %dl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: setnp %cl
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: andb %cl, %sil
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: kmovw %ecx, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ogt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ogt_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ogt", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_oge_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oge_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oge", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_olt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_olt_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: seta %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"olt", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ole_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ole_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ole", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_one_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_one_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setne %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"one", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ord_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ord_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ueq_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ueq_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setnp %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ugt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ugt_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setb %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setb %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k0, %k1, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ugt", metadata !"fpexcept.strict")
+ ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_uge_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_uge_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setbe %dl
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %ecx, %k1
+; CHECK-NEXT: kshiftlw $1, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kmovw %edx, %k1
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: kshiftlw $1, %k2, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kshiftlw $14, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: k...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154486
More information about the llvm-commits
mailing list