[llvm-branch-commits] [llvm] DAG: Use poison for some vector result widening (PR #168290)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Nov 16 13:41:27 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Patch is 76.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168290.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll (-7)
- (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+133-133)
- (modified) llvm/test/CodeGen/X86/half.ll (+64-69)
- (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+38-36)
- (modified) llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (+216-218)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
- SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
}
while (Ops.size() != WidenNumElts)
- Ops.push_back(DAG.getUNDEF(WidenSVT));
+ Ops.push_back(DAG.getPOISON(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenSize % InSize == 0) {
- SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
Ops[0] = InOp;
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(InOp, Ops);
Ops.append(WidenSize / InScalarSize - Ops.size(),
- DAG.getUNDEF(InVT.getVectorElementType()));
+ DAG.getPOISON(InVT.getVectorElementType()));
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
}
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenNumElts / NumInElts;
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue UndefVal = DAG.getPOISON(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned j = 0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
for (; I < WidenNumElts / GCD; ++I)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
for (; i < WidenNumElts / GCD; ++i)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
// Fully unroll and reassemble.
- SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpkhi z2.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpsxws 0, 0
; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI97_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI97_0 at toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpsxws 0, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI97_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI97_0 at toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI97_0 at toc@l
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5558,11 +5558,11 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpsxws 0, 1
-; PC64LE-NEXT: xscvdpsxws 1, 2
+; PC64LE-NEXT: xscvdpsxws 0, 2
+; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI105_0 at toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpsxws 0, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 2
+; PC64LE9-NEXT: xscvdpsxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI105_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpsxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI105_0 at toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI105_0 at toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5783,16 +5783,16 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpuxws 0, 0
; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI113_0 at toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpuxws 0, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI113_0 at toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI113_0 at toc@l
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6054,11 +6054,11 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpuxws 0, 1
-; PC64LE-NEXT: xscvdpuxws 1, 2
+; PC64LE-NEXT: xscvdpuxws 0, 2
+; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI121_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI121_0 at toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpuxws 0, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 2
+; PC64LE9-NEXT: xscvdpuxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI121_0 at toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI121_0 at toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpuxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI121_0 at toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI121_0 at toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6269,33 +6269,33 @@ entry:
define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xsrsp 0, 1
-; PC64LE-NEXT: xsrsp 1, 2
+; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xsrsp 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI129_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xsrsp 1, 1
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 35, 0
-; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 36, 0
+; PC64LE-NEXT: xxmrghw 34, 2, 1
+; PC64LE-NEXT: lxvd2x 1, 0, 3
+; PC64LE-NEXT: xxswapd 35, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xsrsp 0, 1
-; PC64LE9-NEXT: xsrsp 1, 2
+; PC64LE9-NEXT: xsrsp 0, 3
+; PC64LE9-NEXT: xsrsp 2, 2
+; PC64LE9-NEXT: xsrsp 1, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI129_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xsrsp 1, 3
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 2, 1
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxswapd 0, 34
-; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT: xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT: xxswapd 1, 34
; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: mtfprwa 0, 3
; PC64LE-NEXT: mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-NEXT: xscvsxdsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mfvsrwz 3, 34
; PC64LE-NEXT: xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: li 3, 0
+; PC64LE9-NEXT: li 3, 4
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: mtfprwa 0, 3
-; PC64LE9-NEXT: li 3, 4
+; PC64LE9-NEXT: li 3, 0
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE9-NEXT: mfvsrwz 3, 34
; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI161_0 at toc@l
+; PC64LE9-NEXT: mtfprwa 2, 3
+; PC64LE9-NEXT: addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE9-NEXT: xscvsxdsp 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI161_0 at toc@l
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: xscvdpspn 34, 2
+; PC64LE9-NEXT: xxmrghw 35, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mfvsrwz 3, 34
-; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -7225,15 +7225,15 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mtfprd 0, 3
-; PC64LE-NEXT: mtfprd 1, 4
+; PC64LE-NEXT: mtfprd 0, 4
+; PC64LE-NEXT: mtfprd 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI163_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI163_0 at toc@l
; PC64LE-NEXT: xscvsxdsp 0, 0
; PC64LE-NEXT: xscvsxdsp 1, 1
-; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xxmrghw 34, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mtfprd 0, 3
; PC64LE9-NEXT: mtfprd 1, 4
+; PC64LE9-NEXT: mtfprd 2, 3
+; PC64LE9-NEXT: mtfprd 0, 5
; PC64LE9-NEXT: addis 3, 2, .LCPI163_0 at toc@ha
-; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: xscvsxdsp 1, 1
+; PC64LE9-NEXT: xscvsxdsp 2, 2
+; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: addi 3, 3, .LCPI163_0 at toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: mtfprd 1, 5
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168290
More information about the llvm-branch-commits
mailing list