[llvm] 1a7b69f - add custom operation for strict fpextend/fpround
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 26 16:47:52 PST 2019
Author: Liu, Chen3
Date: 2019-12-27T08:28:33+08:00
New Revision: 1a7b69f5dd32980a7e0b0841a99dc65b2b887203
URL: https://github.com/llvm/llvm-project/commit/1a7b69f5dd32980a7e0b0841a99dc65b2b887203
DIFF: https://github.com/llvm/llvm-project/commit/1a7b69f5dd32980a7e0b0841a99dc65b2b887203.diff
LOG: add custom operation for strict fpextend/fpround
Differential Revision: https://reviews.llvm.org/D71892
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/vec-strict-128.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dd73f8a70682..94e43393eab6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1004,7 +1004,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
@@ -20080,12 +20082,13 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
- // FIXME: Strict fp.
- assert(!IsStrict && "Strict FP not supported yet!");
- return DAG.getNode(X86ISD::VFPEXT, DL, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
- In, DAG.getUNDEF(SVT)));
+ SDValue Res =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+ {Op->getOperand(0), Res});
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
}
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -28938,11 +28941,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
}
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: {
- if (!isTypeLegal(N->getOperand(0).getValueType()))
- return;
- SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (!isTypeLegal(Src.getValueType()))
+ return;
+ SDValue V;
+ if (IsStrict)
+ V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ else
+ V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
return;
}
case ISD::FP_EXTEND: {
@@ -29380,10 +29393,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::STRICT_VFPEXT: return "X86ISD::STRICT_VFPEXT";
case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE";
case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS";
case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::STRICT_VFPROUND: return "X86ISD::STRICT_VFPROUND";
case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS";
@@ -34983,6 +34998,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
case X86ISD::STRICT_CVTTP2UI:
case X86ISD::STRICT_CVTSI2P:
case X86ISD::STRICT_CVTUI2P:
+ case X86ISD::STRICT_VFPROUND:
if (In.getOperand(1).getValueType() == MVT::v2f64 ||
In.getOperand(1).getValueType() == MVT::v2i64)
return N->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 18af57156a38..2396cf65fd87 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -296,10 +296,10 @@ namespace llvm {
VMTRUNC, VMTRUNCUS, VMTRUNCS,
// Vector FP extend.
- VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
+ VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, STRICT_VFPEXT,
// Vector FP round.
- VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
+ VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, STRICT_VFPROUND,
// Masked version of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 36f7980a08fa..09ac2ff30177 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7589,7 +7589,7 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
sched.YMM>, EVEX_V256;
}
@@ -7719,7 +7719,7 @@ let Predicates = [HasVLX] in {
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+ def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
(VCVTPD2PSZ128rr VR128X:$src)>;
def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -7728,7 +7728,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+ def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
(VCVTPD2PSZ128rm addr:$src)>;
def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -7737,7 +7737,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
+ def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
(VCVTPD2PSZ128rmb addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index a04c493675af..37cba895c370 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -127,11 +127,32 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
SDTCisSameSizeAs<0, 1>]>>;
+
+def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameSizeAs<0, 1>]>,
+ [SDNPHasChain]>;
+
+def X86any_vfpext : PatFrags<(ops node:$src),
+ [(X86vfpext node:$src),
+ (X86strict_vfpext node:$src)]>;
+
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisOpSmallerThanOp<0, 1>]>,
+ [SDNPHasChain]>;
+
+def X86any_vfpround : PatFrags<(ops node:$src),
+ [(X86vfpround node:$src),
+ (X86strict_vfpround node:$src)]>;
+
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0, 1>,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c218acc6e365..c7ecfba5b245 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1617,7 +1617,7 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
+ [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1636,7 +1636,7 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
+ [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
PS, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1708,11 +1708,11 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
@@ -1732,11 +1732,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
Sched<[WriteCvtPD2PS]>, SIMD_EXC;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
let Predicates = [HasAVX, NoVLX] in {
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
index 1372ab6b4991..7477f89199e7 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -17,7 +17,9 @@ declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x
declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
@@ -399,4 +401,38 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
ret <2 x double> %res
}
+define <2 x double> @f15(<2 x float> %a) #0 {
+; SSE-LABEL: f15:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtps2pd %xmm0, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f15:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+ <2 x float> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %ret
+}
+
+define <2 x float> @f16(<2 x double> %a) #0 {
+; SSE-LABEL: f16:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtpd2ps %xmm0, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %ret = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+ <2 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %ret
+}
+
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index c80e1b281ddc..fc92546bd1de 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -5433,20 +5433,12 @@ entry:
define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; AVX-NEXT: vcvtpd2psx {{.*}}(%rip), %xmm0
; AVX-NEXT: retq
entry:
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
@@ -5492,17 +5484,9 @@ entry:
define <4 x float> @constrained_vector_fptrunc_v4f64() #0 {
; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1
+; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v4f64:
@@ -5540,20 +5524,12 @@ entry:
define <2 x double> @constrained_vector_fpext_v2f32() #0 {
; CHECK-LABEL: constrained_vector_fpext_v2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v2f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %xmm0
; AVX-NEXT: retq
entry:
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
@@ -5597,16 +5573,8 @@ entry:
define <4 x double> @constrained_vector_fpext_v4f32() #0 {
; CHECK-LABEL: constrained_vector_fpext_v4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm1, %xmm2
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
-; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm1
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v4f32:
More information about the llvm-commits
mailing list