[llvm] 62f3403 - [LegalizeTypes] Add widening support for STRICT_FSETCC/FSETCCS
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 13:46:54 PST 2020
Author: Craig Topper
Date: 2020-01-06T13:45:55-08:00
New Revision: 62f3403bfc17906aba555d6100e0136363f6a649
URL: https://github.com/llvm/llvm-project/commit/62f3403bfc17906aba555d6100e0136363f6a649
DIFF: https://github.com/llvm/llvm-project/commit/62f3403bfc17906aba555d6100e0136363f6a649.diff
LOG: [LegalizeTypes] Add widening support for STRICT_FSETCC/FSETCCS
This patch adds widening which really just scalarizes because we don't have a strategy for the extra elements we would need to pad with.
Differential Revision: https://reviews.llvm.org/D72193
Added:
llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index faeeee68fd29..f3b376246412 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -805,6 +805,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
@@ -834,6 +835,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index dce2867a3ddb..1cf39a740c5b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3039,6 +3039,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
switch (N->getOpcode()) {
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return WidenVecRes_STRICT_FSETCC(N);
case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_TO_SINT:
@@ -4116,6 +4119,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
WidenVT, InOp1, InOp2, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(1).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ SDValue CC = N->getOperand(3);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+
+ // Fully unroll and reassemble.
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Chains(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, dl, Scalars);
+}
//===----------------------------------------------------------------------===//
// Widen Vector Operand
@@ -4147,6 +4191,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -4590,6 +4636,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
return DAG.getNode(ExtendCode, dl, VT, CC);
}
+SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = GetWidenedVector(N->getOperand(1));
+ SDValue RHS = GetWidenedVector(N->getOperand(2));
+ SDValue CC = N->getOperand(3);
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Unroll into a build vector.
+ SmallVector<SDValue, 8> Scalars(NumElts);
+ SmallVector<SDValue, 8> Chains(NumElts);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(VT, dl, Scalars);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetWidenedVector(N->getOperand(0));
diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
new file mode 100644
index 000000000000..6dbac0e8ff6b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
@@ -0,0 +1,308 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE,SSE-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE,SSE-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-64
+
+define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, <2 x float> %f2) #0 {
+; SSE-32-LABEL: test_v2f32_ogt_s:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: andl $-16, %esp
+; SSE-32-NEXT: subl $16, %esp
+; SSE-32-NEXT: movaps 8(%ebp), %xmm3
+; SSE-32-NEXT: xorl %eax, %eax
+; SSE-32-NEXT: comiss %xmm3, %xmm2
+; SSE-32-NEXT: movl $-1, %ecx
+; SSE-32-NEXT: movl $0, %edx
+; SSE-32-NEXT: cmoval %ecx, %edx
+; SSE-32-NEXT: movd %edx, %xmm4
+; SSE-32-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm3, %xmm2
+; SSE-32-NEXT: cmoval %ecx, %eax
+; SSE-32-NEXT: movd %eax, %xmm2
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; SSE-32-NEXT: pand %xmm4, %xmm0
+; SSE-32-NEXT: pandn %xmm1, %xmm4
+; SSE-32-NEXT: por %xmm4, %xmm0
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: test_v2f32_ogt_s:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: xorl %eax, %eax
+; SSE-64-NEXT: comiss %xmm3, %xmm2
+; SSE-64-NEXT: movl $-1, %ecx
+; SSE-64-NEXT: movl $0, %edx
+; SSE-64-NEXT: cmoval %ecx, %edx
+; SSE-64-NEXT: movd %edx, %xmm4
+; SSE-64-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-64-NEXT: comiss %xmm3, %xmm2
+; SSE-64-NEXT: cmoval %ecx, %eax
+; SSE-64-NEXT: movd %eax, %xmm2
+; SSE-64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; SSE-64-NEXT: pand %xmm4, %xmm0
+; SSE-64-NEXT: pandn %xmm1, %xmm4
+; SSE-64-NEXT: por %xmm4, %xmm0
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: test_v2f32_ogt_s:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: andl $-16, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: vmovaps 8(%ebp), %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX-32-NEXT: xorl %eax, %eax
+; AVX-32-NEXT: vcomiss %xmm4, %xmm5
+; AVX-32-NEXT: movl $-1, %ecx
+; AVX-32-NEXT: movl $0, %edx
+; AVX-32-NEXT: cmoval %ecx, %edx
+; AVX-32-NEXT: vcomiss %xmm3, %xmm2
+; AVX-32-NEXT: cmoval %ecx, %eax
+; AVX-32-NEXT: vmovd %eax, %xmm2
+; AVX-32-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
+; AVX-32-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: test_v2f32_ogt_s:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX-64-NEXT: xorl %eax, %eax
+; AVX-64-NEXT: vcomiss %xmm4, %xmm5
+; AVX-64-NEXT: movl $-1, %ecx
+; AVX-64-NEXT: movl $0, %edx
+; AVX-64-NEXT: cmoval %ecx, %edx
+; AVX-64-NEXT: vcomiss %xmm3, %xmm2
+; AVX-64-NEXT: cmoval %ecx, %eax
+; AVX-64-NEXT: vmovd %eax, %xmm2
+; AVX-64-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
+; AVX-64-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-64-NEXT: retq
+;
+; AVX512-32-LABEL: test_v2f32_ogt_s:
+; AVX512-32: # %bb.0:
+; AVX512-32-NEXT: pushl %ebp
+; AVX512-32-NEXT: movl %esp, %ebp
+; AVX512-32-NEXT: andl $-16, %esp
+; AVX512-32-NEXT: subl $16, %esp
+; AVX512-32-NEXT: vmovaps 8(%ebp), %xmm3
+; AVX512-32-NEXT: movw $-3, %ax
+; AVX512-32-NEXT: kmovw %eax, %k0
+; AVX512-32-NEXT: vcomiss %xmm3, %xmm2
+; AVX512-32-NEXT: seta %al
+; AVX512-32-NEXT: andl $1, %eax
+; AVX512-32-NEXT: kmovw %eax, %k1
+; AVX512-32-NEXT: kandw %k0, %k1, %k0
+; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX512-32-NEXT: vcomiss %xmm3, %xmm2
+; AVX512-32-NEXT: seta %al
+; AVX512-32-NEXT: kmovw %eax, %k1
+; AVX512-32-NEXT: kshiftlw $15, %k1, %k1
+; AVX512-32-NEXT: kshiftrw $14, %k1, %k1
+; AVX512-32-NEXT: korw %k1, %k0, %k1
+; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512-32-NEXT: movl %ebp, %esp
+; AVX512-32-NEXT: popl %ebp
+; AVX512-32-NEXT: retl
+;
+; AVX512-64-LABEL: test_v2f32_ogt_s:
+; AVX512-64: # %bb.0:
+; AVX512-64-NEXT: movw $-3, %ax
+; AVX512-64-NEXT: kmovw %eax, %k0
+; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
+; AVX512-64-NEXT: seta %al
+; AVX512-64-NEXT: andl $1, %eax
+; AVX512-64-NEXT: kmovw %eax, %k1
+; AVX512-64-NEXT: kandw %k0, %k1, %k0
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
+; AVX512-64-NEXT: seta %al
+; AVX512-64-NEXT: kmovw %eax, %k1
+; AVX512-64-NEXT: kshiftlw $15, %k1, %k1
+; AVX512-64-NEXT: kshiftrw $14, %k1, %k1
+; AVX512-64-NEXT: korw %k1, %k0, %k1
+; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512-64-NEXT: retq
+ %cond = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(
+ <2 x float> %f1, <2 x float> %f2, metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %res = select <2 x i1> %cond, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %res
+}
+
+define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, <2 x float> %f2) #0 {
+; SSE-32-LABEL: test_v2f32_oeq_q:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: andl $-16, %esp
+; SSE-32-NEXT: subl $16, %esp
+; SSE-32-NEXT: movaps 8(%ebp), %xmm3
+; SSE-32-NEXT: xorl %eax, %eax
+; SSE-32-NEXT: ucomiss %xmm3, %xmm2
+; SSE-32-NEXT: movl $-1, %ecx
+; SSE-32-NEXT: movl $-1, %edx
+; SSE-32-NEXT: cmovnel %eax, %edx
+; SSE-32-NEXT: cmovpl %eax, %edx
+; SSE-32-NEXT: movd %edx, %xmm4
+; SSE-32-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-32-NEXT: ucomiss %xmm3, %xmm2
+; SSE-32-NEXT: cmovnel %eax, %ecx
+; SSE-32-NEXT: cmovpl %eax, %ecx
+; SSE-32-NEXT: movd %ecx, %xmm2
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; SSE-32-NEXT: pand %xmm4, %xmm0
+; SSE-32-NEXT: pandn %xmm1, %xmm4
+; SSE-32-NEXT: por %xmm4, %xmm0
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: test_v2f32_oeq_q:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: xorl %eax, %eax
+; SSE-64-NEXT: ucomiss %xmm3, %xmm2
+; SSE-64-NEXT: movl $-1, %ecx
+; SSE-64-NEXT: movl $-1, %edx
+; SSE-64-NEXT: cmovnel %eax, %edx
+; SSE-64-NEXT: cmovpl %eax, %edx
+; SSE-64-NEXT: movd %edx, %xmm4
+; SSE-64-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-64-NEXT: ucomiss %xmm3, %xmm2
+; SSE-64-NEXT: cmovnel %eax, %ecx
+; SSE-64-NEXT: cmovpl %eax, %ecx
+; SSE-64-NEXT: movd %ecx, %xmm2
+; SSE-64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; SSE-64-NEXT: pand %xmm4, %xmm0
+; SSE-64-NEXT: pandn %xmm1, %xmm4
+; SSE-64-NEXT: por %xmm4, %xmm0
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: test_v2f32_oeq_q:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: andl $-16, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: vmovaps 8(%ebp), %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX-32-NEXT: xorl %eax, %eax
+; AVX-32-NEXT: vucomiss %xmm4, %xmm5
+; AVX-32-NEXT: movl $-1, %ecx
+; AVX-32-NEXT: movl $-1, %edx
+; AVX-32-NEXT: cmovnel %eax, %edx
+; AVX-32-NEXT: cmovpl %eax, %edx
+; AVX-32-NEXT: vucomiss %xmm3, %xmm2
+; AVX-32-NEXT: cmovnel %eax, %ecx
+; AVX-32-NEXT: cmovpl %eax, %ecx
+; AVX-32-NEXT: vmovd %ecx, %xmm2
+; AVX-32-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
+; AVX-32-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: test_v2f32_oeq_q:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX-64-NEXT: xorl %eax, %eax
+; AVX-64-NEXT: vucomiss %xmm4, %xmm5
+; AVX-64-NEXT: movl $-1, %ecx
+; AVX-64-NEXT: movl $-1, %edx
+; AVX-64-NEXT: cmovnel %eax, %edx
+; AVX-64-NEXT: cmovpl %eax, %edx
+; AVX-64-NEXT: vucomiss %xmm3, %xmm2
+; AVX-64-NEXT: cmovnel %eax, %ecx
+; AVX-64-NEXT: cmovpl %eax, %ecx
+; AVX-64-NEXT: vmovd %ecx, %xmm2
+; AVX-64-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
+; AVX-64-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-64-NEXT: retq
+;
+; AVX512-32-LABEL: test_v2f32_oeq_q:
+; AVX512-32: # %bb.0:
+; AVX512-32-NEXT: pushl %ebp
+; AVX512-32-NEXT: movl %esp, %ebp
+; AVX512-32-NEXT: andl $-16, %esp
+; AVX512-32-NEXT: subl $16, %esp
+; AVX512-32-NEXT: vmovaps 8(%ebp), %xmm3
+; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX512-32-NEXT: vucomiss %xmm4, %xmm5
+; AVX512-32-NEXT: setnp %al
+; AVX512-32-NEXT: sete %cl
+; AVX512-32-NEXT: testb %al, %cl
+; AVX512-32-NEXT: setne %al
+; AVX512-32-NEXT: kmovw %eax, %k0
+; AVX512-32-NEXT: kshiftlw $15, %k0, %k0
+; AVX512-32-NEXT: kshiftrw $14, %k0, %k0
+; AVX512-32-NEXT: vucomiss %xmm3, %xmm2
+; AVX512-32-NEXT: setnp %al
+; AVX512-32-NEXT: sete %cl
+; AVX512-32-NEXT: testb %al, %cl
+; AVX512-32-NEXT: setne %al
+; AVX512-32-NEXT: andl $1, %eax
+; AVX512-32-NEXT: kmovw %eax, %k1
+; AVX512-32-NEXT: movw $-3, %ax
+; AVX512-32-NEXT: kmovw %eax, %k2
+; AVX512-32-NEXT: kandw %k2, %k1, %k1
+; AVX512-32-NEXT: korw %k0, %k1, %k1
+; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512-32-NEXT: movl %ebp, %esp
+; AVX512-32-NEXT: popl %ebp
+; AVX512-32-NEXT: retl
+;
+; AVX512-64-LABEL: test_v2f32_oeq_q:
+; AVX512-64: # %bb.0:
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; AVX512-64-NEXT: vucomiss %xmm4, %xmm5
+; AVX512-64-NEXT: setnp %al
+; AVX512-64-NEXT: sete %cl
+; AVX512-64-NEXT: testb %al, %cl
+; AVX512-64-NEXT: setne %al
+; AVX512-64-NEXT: kmovw %eax, %k0
+; AVX512-64-NEXT: kshiftlw $15, %k0, %k0
+; AVX512-64-NEXT: kshiftrw $14, %k0, %k0
+; AVX512-64-NEXT: vucomiss %xmm3, %xmm2
+; AVX512-64-NEXT: setnp %al
+; AVX512-64-NEXT: sete %cl
+; AVX512-64-NEXT: testb %al, %cl
+; AVX512-64-NEXT: setne %al
+; AVX512-64-NEXT: andl $1, %eax
+; AVX512-64-NEXT: kmovw %eax, %k1
+; AVX512-64-NEXT: movw $-3, %ax
+; AVX512-64-NEXT: kmovw %eax, %k2
+; AVX512-64-NEXT: kandw %k2, %k1, %k1
+; AVX512-64-NEXT: korw %k0, %k1, %k1
+; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512-64-NEXT: retq
+ %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f32(
+ <2 x float> %f1, <2 x float> %f2, metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select <2 x i1> %cond, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %res
+}
+
+attributes #0 = { strictfp nounwind }
+
+declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f32(<2 x float>, <2 x float>, metadata, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float>, <2 x float>, metadata, metadata)
More information about the llvm-commits
mailing list