[llvm] 253ed52 - DAG: Use poison for some vector result widening (#168290)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 13:49:47 PST 2025
Author: Matt Arsenault
Date: 2025-11-19T16:49:43-05:00
New Revision: 253ed524365e20309e0f615415c9433bd9bda44d
URL: https://github.com/llvm/llvm-project/commit/253ed524365e20309e0f615415c9433bd9bda44d
DIFF: https://github.com/llvm/llvm-project/commit/253ed524365e20309e0f615415c9433bd9bda44d.diff
LOG: DAG: Use poison for some vector result widening (#168290)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/X86/matrix-multiply.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
- SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
}
while (Ops.size() != WidenNumElts)
- Ops.push_back(DAG.getUNDEF(WidenSVT));
+ Ops.push_back(DAG.getPOISON(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenSize % InSize == 0) {
- SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
Ops[0] = InOp;
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(InOp, Ops);
Ops.append(WidenSize / InScalarSize - Ops.size(),
- DAG.getUNDEF(InVT.getVectorElementType()));
+ DAG.getPOISON(InVT.getVectorElementType()));
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
}
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenNumElts / NumInElts;
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue UndefVal = DAG.getPOISON(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned j = 0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
for (; I < WidenNumElts / GCD; ++I)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
for (; i < WidenNumElts / GCD; ++i)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
// Fully unroll and reassemble.
- SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpkhi z2.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpsxws 0, 0
; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI97_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI97_0 at toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpsxws 0, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI97_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI97_0 at toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI97_0 at toc@l
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5558,11 +5558,11 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpsxws 0, 1
-; PC64LE-NEXT: xscvdpsxws 1, 2
+; PC64LE-NEXT: xscvdpsxws 0, 2
+; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI105_0 at toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpsxws 0, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 2
+; PC64LE9-NEXT: xscvdpsxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI105_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpsxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI105_0 at toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI105_0 at toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5783,16 +5783,16 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpuxws 0, 0
; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI113_0 at toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpuxws 0, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI113_0 at toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI113_0 at toc@l
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6054,11 +6054,11 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpuxws 0, 1
-; PC64LE-NEXT: xscvdpuxws 1, 2
+; PC64LE-NEXT: xscvdpuxws 0, 2
+; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI121_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI121_0 at toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpuxws 0, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 2
+; PC64LE9-NEXT: xscvdpuxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI121_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI121_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpuxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI121_0 at toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI121_0 at toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6269,33 +6269,33 @@ entry:
define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xsrsp 0, 1
-; PC64LE-NEXT: xsrsp 1, 2
+; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xsrsp 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI129_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xsrsp 1, 1
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 35, 0
-; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 36, 0
+; PC64LE-NEXT: xxmrghw 34, 2, 1
+; PC64LE-NEXT: lxvd2x 1, 0, 3
+; PC64LE-NEXT: xxswapd 35, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xsrsp 0, 1
-; PC64LE9-NEXT: xsrsp 1, 2
+; PC64LE9-NEXT: xsrsp 0, 3
+; PC64LE9-NEXT: xsrsp 2, 2
+; PC64LE9-NEXT: xsrsp 1, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI129_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xsrsp 1, 3
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 2, 1
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxswapd 0, 34
-; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT: xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT: xxswapd 1, 34
; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: mtfprwa 0, 3
; PC64LE-NEXT: mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-NEXT: xscvsxdsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mfvsrwz 3, 34
; PC64LE-NEXT: xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: li 3, 0
+; PC64LE9-NEXT: li 3, 4
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: mtfprwa 0, 3
-; PC64LE9-NEXT: li 3, 4
+; PC64LE9-NEXT: li 3, 0
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE9-NEXT: mfvsrwz 3, 34
; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI161_0 at toc@l
+; PC64LE9-NEXT: mtfprwa 2, 3
+; PC64LE9-NEXT: addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE9-NEXT: xscvsxdsp 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI161_0 at toc@l
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: xscvdpspn 34, 2
+; PC64LE9-NEXT: xxmrghw 35, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mfvsrwz 3, 34
-; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -7225,15 +7225,15 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mtfprd 0, 3
-; PC64LE-NEXT: mtfprd 1, 4
+; PC64LE-NEXT: mtfprd 0, 4
+; PC64LE-NEXT: mtfprd 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI163_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI163_0 at toc@l
; PC64LE-NEXT: xscvsxdsp 0, 0
; PC64LE-NEXT: xscvsxdsp 1, 1
-; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xxmrghw 34, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mtfprd 0, 3
; PC64LE9-NEXT: mtfprd 1, 4
+; PC64LE9-NEXT: mtfprd 2, 3
+; PC64LE9-NEXT: mtfprd 0, 5
; PC64LE9-NEXT: addis 3, 2, .LCPI163_0 at toc@ha
-; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: xscvsxdsp 1, 1
+; PC64LE9-NEXT: xscvsxdsp 2, 2
+; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: addi 3, 3, .LCPI163_0 at toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: mtfprd 1, 5
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 1, 2
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float>
@@ -7709,8 +7709,8 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxswapd 0, 34
-; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT: xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT: xxswapd 1, 34
; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: mtfprwz 0, 3
; PC64LE-NEXT: mffprwz 3, 1
@@ -7721,7 +7721,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-NEXT: xscvuxdsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mfvsrwz 3, 34
; PC64LE-NEXT: xxswapd 36, 0
@@ -7733,24 +7733,24 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: li 3, 0
+; PC64LE9-NEXT: li 3, 4
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: li 3, 4
+; PC64LE9-NEXT: li 3, 0
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: xscvuxdsp 0, 0
; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI179_0 at toc@ha
+; PC64LE9-NEXT: mfvsrwz 3, 34
; PC64LE9-NEXT: xscvuxdsp 1, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI179_0 at toc@l
+; PC64LE9-NEXT: mtfprwz 2, 3
+; PC64LE9-NEXT: addis 3, 2, .LCPI179_0 at toc@ha
+; PC64LE9-NEXT: xscvuxdsp 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI179_0 at toc@l
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: xscvdpspn 34, 2
+; PC64LE9-NEXT: xxmrghw 35, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mfvsrwz 3, 34
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: xscvuxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -7792,15 +7792,15 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mtfprd 0, 3
-; PC64LE-NEXT: mtfprd 1, 4
+; PC64LE-NEXT: mtfprd 0, 4
+; PC64LE-NEXT: mtfprd 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI181_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI181_0 at toc@l
; PC64LE-NEXT: xscvuxdsp 0, 0
; PC64LE-NEXT: xscvuxdsp 1, 1
-; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xxmrghw 34, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: mtfprd 0, 5
@@ -7811,20 +7811,20 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mtfprd 0, 3
; PC64LE9-NEXT: mtfprd 1, 4
+; PC64LE9-NEXT: mtfprd 2, 3
+; PC64LE9-NEXT: mtfprd 0, 5
; PC64LE9-NEXT: addis 3, 2, .LCPI181_0 at toc@ha
-; PC64LE9-NEXT: xscvuxdsp 0, 0
; PC64LE9-NEXT: xscvuxdsp 1, 1
+; PC64LE9-NEXT: xscvuxdsp 2, 2
+; PC64LE9-NEXT: xscvuxdsp 0, 0
; PC64LE9-NEXT: addi 3, 3, .LCPI181_0 at toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: mtfprd 1, 5
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvuxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 1, 2
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float>
diff --git a/llvm/test/CodeGen/X86/matrix-multiply.ll b/llvm/test/CodeGen/X86/matrix-multiply.ll
index 388d8528a2b80..f38b769fe4987 100644
--- a/llvm/test/CodeGen/X86/matrix-multiply.ll
+++ b/llvm/test/CodeGen/X86/matrix-multiply.ll
@@ -368,46 +368,47 @@ define <9 x float> @test_mul3x3_f32(<9 x float> %a0, <9 x float> %a1) nounwind {
; AVX512F-NEXT: vaddps %xmm4, %xmm9, %xmm9
; AVX512F-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
; AVX512F-NEXT: vmulss %xmm1, %xmm4, %xmm10
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; AVX512F-NEXT: vmulss %xmm6, %xmm5, %xmm6
-; AVX512F-NEXT: vaddss %xmm6, %xmm10, %xmm6
-; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm10
-; AVX512F-NEXT: vmulss %xmm8, %xmm10, %xmm8
-; AVX512F-NEXT: vaddss %xmm6, %xmm8, %xmm6
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm6 = xmm9[0,1],xmm6[0],xmm9[3]
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm11 = xmm5[1,1,3,3]
+; AVX512F-NEXT: vmulss %xmm6, %xmm11, %xmm5
+; AVX512F-NEXT: vaddss %xmm5, %xmm10, %xmm5
+; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm6
+; AVX512F-NEXT: vmulss %xmm6, %xmm8, %xmm8
+; AVX512F-NEXT: vaddss %xmm5, %xmm8, %xmm5
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm5 = xmm9[0,1],xmm5[0],xmm9[3]
; AVX512F-NEXT: vmulps %xmm7, %xmm0, %xmm8
; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm9
-; AVX512F-NEXT: vmovsldup {{.*#+}} xmm11 = xmm9[0,0,2,2]
-; AVX512F-NEXT: vmulps %xmm2, %xmm11, %xmm11
-; AVX512F-NEXT: vaddps %xmm11, %xmm8, %xmm8
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm11 = xmm9[1,1,3,3]
-; AVX512F-NEXT: vmulps %xmm3, %xmm11, %xmm12
+; AVX512F-NEXT: vmovsldup {{.*#+}} xmm10 = xmm9[0,0,2,2]
+; AVX512F-NEXT: vmulps %xmm2, %xmm10, %xmm10
+; AVX512F-NEXT: vaddps %xmm10, %xmm8, %xmm8
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm9[1,1,3,3]
+; AVX512F-NEXT: vmulps %xmm3, %xmm10, %xmm12
; AVX512F-NEXT: vaddps %xmm12, %xmm8, %xmm8
; AVX512F-NEXT: vmulss %xmm7, %xmm4, %xmm7
-; AVX512F-NEXT: vmulss %xmm5, %xmm9, %xmm12
+; AVX512F-NEXT: vmulss %xmm9, %xmm11, %xmm12
; AVX512F-NEXT: vaddss %xmm7, %xmm12, %xmm7
-; AVX512F-NEXT: vmulss %xmm11, %xmm10, %xmm11
-; AVX512F-NEXT: vaddss %xmm7, %xmm11, %xmm7
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm7 = xmm8[0,1],xmm7[0],xmm8[3]
-; AVX512F-NEXT: vshufps {{.*#+}} xmm8 = xmm9[3,3,3,3]
-; AVX512F-NEXT: vshufpd {{.*#+}} xmm11 = xmm9[1,0]
+; AVX512F-NEXT: vmulss %xmm6, %xmm10, %xmm10
+; AVX512F-NEXT: vaddss %xmm7, %xmm10, %xmm7
+; AVX512F-NEXT: vshufps {{.*#+}} xmm10 = xmm9[3,3,3,3]
+; AVX512F-NEXT: vshufpd {{.*#+}} xmm12 = xmm9[1,0]
; AVX512F-NEXT: vshufps {{.*#+}} xmm9 = xmm9[2,2,2,2]
; AVX512F-NEXT: vmulps %xmm0, %xmm9, %xmm0
-; AVX512F-NEXT: vmulps %xmm2, %xmm8, %xmm2
+; AVX512F-NEXT: vmulps %xmm2, %xmm10, %xmm2
; AVX512F-NEXT: vaddps %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; AVX512F-NEXT: vbroadcastss %xmm1, %xmm2
; AVX512F-NEXT: vmulps %xmm2, %xmm3, %xmm2
; AVX512F-NEXT: vaddps %xmm2, %xmm0, %xmm0
-; AVX512F-NEXT: vmulss %xmm4, %xmm11, %xmm2
-; AVX512F-NEXT: vmulss %xmm5, %xmm8, %xmm3
+; AVX512F-NEXT: vmulss %xmm4, %xmm12, %xmm2
+; AVX512F-NEXT: vmulss %xmm10, %xmm11, %xmm3
; AVX512F-NEXT: vaddss %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vmulss %xmm1, %xmm10, %xmm1
+; AVX512F-NEXT: vmulss %xmm1, %xmm6, %xmm1
; AVX512F-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1],xmm1[0],xmm0[3]
-; AVX512F-NEXT: vinsertf32x4 $1, %xmm7, %zmm6, %zmm2
-; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [0,1,2,4,5,6,16,17,18,0,0,0,0,0,0,0]
-; AVX512F-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm8[1,1,3,3]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[2,3]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0,1,2],xmm8[0]
+; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX512F-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_mul3x3_f32:
@@ -447,26 +448,27 @@ define <9 x float> @test_mul3x3_f32(<9 x float> %a0, <9 x float> %a1) nounwind {
; AVX512VL-NEXT: vaddss %xmm7, %xmm12, %xmm7
; AVX512VL-NEXT: vmulss %xmm11, %xmm10, %xmm11
; AVX512VL-NEXT: vaddss %xmm7, %xmm11, %xmm7
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm7[0],xmm5[3]
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm7 = xmm8[3,3,3,3]
-; AVX512VL-NEXT: vshufpd {{.*#+}} xmm11 = xmm8[1,0]
+; AVX512VL-NEXT: vshufps {{.*#+}} xmm11 = xmm8[3,3,3,3]
+; AVX512VL-NEXT: vshufpd {{.*#+}} xmm12 = xmm8[1,0]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm8 = xmm8[2,2,2,2]
; AVX512VL-NEXT: vmulps %xmm0, %xmm8, %xmm0
-; AVX512VL-NEXT: vmulps %xmm7, %xmm2, %xmm2
+; AVX512VL-NEXT: vmulps %xmm2, %xmm11, %xmm2
; AVX512VL-NEXT: vaddps %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; AVX512VL-NEXT: vbroadcastss %xmm1, %xmm2
; AVX512VL-NEXT: vmulps %xmm2, %xmm6, %xmm2
; AVX512VL-NEXT: vaddps %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vmulss %xmm11, %xmm9, %xmm2
-; AVX512VL-NEXT: vmulss %xmm7, %xmm4, %xmm4
+; AVX512VL-NEXT: vmulss %xmm12, %xmm9, %xmm2
+; AVX512VL-NEXT: vmulss %xmm4, %xmm11, %xmm4
; AVX512VL-NEXT: vaddss %xmm4, %xmm2, %xmm2
; AVX512VL-NEXT: vmulss %xmm1, %xmm10, %xmm1
; AVX512VL-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1],xmm1[0],xmm0[3]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm5, %zmm3, %zmm2
-; AVX512VL-NEXT: vpmovsxbd {{.*#+}} zmm0 = [0,1,2,4,5,6,16,17,18,0,0,0,0,0,0,0]
-; AVX512VL-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
+; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[2,3]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm5[0]
+; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX512VL-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
entry:
%block = shufflevector <9 x float> %a0, <9 x float> poison, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 4a5b4277c3cca..88d3ad181d766 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -4143,11 +4143,11 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; CHECK-NEXT: movd %edx, %xmm1
+; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
@@ -4155,10 +4155,10 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; AVX-NEXT: vmovd %edx, %xmm0
+; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
@@ -4256,11 +4256,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 {
; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm0
+; AVX1-NEXT: vmovq %rcx, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -4268,11 +4268,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 {
; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX512-NEXT: vmovq %rax, %xmm1
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -4382,11 +4382,11 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; CHECK-NEXT: movd %edx, %xmm1
+; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
@@ -4394,10 +4394,10 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; AVX-NEXT: vmovd %edx, %xmm0
+; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
@@ -4498,11 +4498,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 {
; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm0
+; AVX1-NEXT: vmovq %rcx, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -4510,11 +4510,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 {
; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX512-NEXT: vmovq %rax, %xmm1
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -4645,11 +4645,11 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; CHECK-NEXT: movd %edx, %xmm1
+; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
@@ -4658,19 +4658,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
-; AVX1-NEXT: vmovd %ecx, %xmm0
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
+; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX1-NEXT: vmovd %edx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX512-NEXT: vmovd %eax, %xmm0
-; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; AVX512-NEXT: vmovd %edx, %xmm0
+; AVX512-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
@@ -4911,7 +4911,7 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm2, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
@@ -4921,51 +4921,51 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; AVX1-NEXT: vmovaps %xmm0, %xmm3
; AVX1-NEXT: .LBB123_2: # %entry
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: setbe %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
-; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
-; AVX1-NEXT: vcomiss %xmm3, %xmm0
-; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vcvttss2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
+; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX1-NEXT: ja .LBB123_4
; AVX1-NEXT: # %bb.3: # %entry
-; AVX1-NEXT: vmovaps %xmm0, %xmm4
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
; AVX1-NEXT: .LBB123_4: # %entry
-; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttss2si %xmm2, %rdx
; AVX1-NEXT: setbe %cl
; AVX1-NEXT: movzbl %cl, %ecx
; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm3
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
-; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: xorq %rdx, %rcx
+; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
; AVX1-NEXT: ja .LBB123_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: .LBB123_6: # %entry
-; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
-; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: setbe %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vcvttss2si %xmm0, %rdx
+; AVX1-NEXT: setbe %sil
+; AVX1-NEXT: movzbl %sil, %esi
+; AVX1-NEXT: shlq $63, %rsi
+; AVX1-NEXT: xorq %rdx, %rsi
+; AVX1-NEXT: vmovq %rsi, %xmm0
+; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX512-NEXT: vmovq %rax, %xmm1
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -5194,11 +5194,11 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; CHECK-NEXT: movd %edx, %xmm1
+; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
@@ -5207,19 +5207,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
-; AVX1-NEXT: vmovd %ecx, %xmm0
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
+; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX1-NEXT: vmovd %edx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX512-NEXT: vmovd %eax, %xmm0
-; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
-; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx
+; AVX512-NEXT: vmovd %edx, %xmm0
+; AVX512-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
@@ -5466,7 +5466,7 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
+; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0]
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0]
; AVX1-NEXT: vcomisd %xmm2, %xmm0
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -5476,51 +5476,51 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; AVX1-NEXT: vmovapd %xmm0, %xmm3
; AVX1-NEXT: .LBB131_2: # %entry
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: setbe %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
-; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
-; AVX1-NEXT: vcomisd %xmm3, %xmm0
-; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
+; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2100000000000001E+1,0.0E+0]
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: ja .LBB131_4
; AVX1-NEXT: # %bb.3: # %entry
-; AVX1-NEXT: vmovapd %xmm0, %xmm4
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
; AVX1-NEXT: .LBB131_4: # %entry
-; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
; AVX1-NEXT: setbe %cl
; AVX1-NEXT: movzbl %cl, %ecx
; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm3
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
-; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: xorq %rdx, %rcx
+; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
; AVX1-NEXT: ja .LBB131_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm1
; AVX1-NEXT: .LBB131_6: # %entry
-; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: setbe %cl
-; AVX1-NEXT: movzbl %cl, %ecx
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vcvttsd2si %xmm0, %rdx
+; AVX1-NEXT: setbe %sil
+; AVX1-NEXT: movzbl %sil, %esi
+; AVX1-NEXT: shlq $63, %rsi
+; AVX1-NEXT: xorq %rdx, %rsi
+; AVX1-NEXT: vmovq %rsi, %xmm0
+; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
+; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
; AVX512-NEXT: vmovq %rax, %xmm1
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -5731,26 +5731,26 @@ entry:
define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0]
-; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: cvtsd2ss %xmm2, %xmm2
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2100000000000001E+1,0.0E+0]
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0]
-; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -5834,14 +5834,14 @@ define <3 x double> @constrained_vector_fpext_v3f32() #0 {
;
; AVX-LABEL: constrained_vector_fpext_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
-; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
%result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
@@ -6702,14 +6702,14 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
;
; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vextractps $2, %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm1
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm2
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%result = call <3 x double>
@@ -6722,31 +6722,31 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %eax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
; CHECK-NEXT: movd %xmm2, %eax
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2ss %eax, %xmm2
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %eax, %xmm0
-; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vextractps $2, %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
-; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-NEXT: retq
entry:
%result = call <3 x float>
@@ -6769,28 +6769,28 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
;
; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vmovq %xmm1, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
entry:
%result = call <3 x double>
@@ -6803,39 +6803,38 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: cvtsi2ss %rsi, %xmm2
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vmovq %xmm1, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
-; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
@@ -7415,26 +7414,26 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
;
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vextractps $2, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vextractps $2, %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm2
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512-NEXT: vpextrd $1, %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
entry:
%result = call <3 x double>
@@ -7447,43 +7446,43 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
; CHECK-NEXT: movd %xmm2, %eax
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vextractps $2, %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vextractps $2, %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm1
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm2
-; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512-NEXT: vpextrd $1, %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512-NEXT: retq
entry:
%result = call <3 x float>
@@ -7539,7 +7538,8 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
;
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: movl %eax, %edx
@@ -7565,9 +7565,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: .LBB183_4: # %entry
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: movl %eax, %edx
@@ -7580,20 +7578,21 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: .LBB183_6: # %entry
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vmovq %xmm1, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
entry:
%result = call <3 x double>
@@ -7606,13 +7605,13 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: orq %rax, %rcx
-; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: cmovnsq %rsi, %rcx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovnsq %rdx, %rcx
; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
; CHECK-NEXT: jns .LBB184_2
; CHECK-NEXT: # %bb.1:
@@ -7630,26 +7629,26 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm0, %xmm0
; CHECK-NEXT: .LBB184_4: # %entry
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: orq %rax, %rcx
-; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovnsq %rdx, %rcx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovnsq %rsi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm2
; CHECK-NEXT: jns .LBB184_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: addss %xmm2, %xmm2
; CHECK-NEXT: .LBB184_6: # %entry
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: movl %eax, %edx
@@ -7675,9 +7674,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
; AVX1-NEXT: .LBB184_4: # %entry
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: movl %eax, %edx
@@ -7690,21 +7687,22 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: .LBB184_6: # %entry
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vmovq %xmm1, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
-; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
More information about the llvm-commits
mailing list