[llvm] [PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types. (PR #80784)
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 13:22:30 PST 2024
https://github.com/amy-kwan updated https://github.com/llvm/llvm-project/pull/80784
>From 72db501b5ed30ee6c20600f91c471b5829f0a4fd Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 2 Feb 2024 10:27:28 -0600
Subject: [PATCH 1/6] [PowerPC] Fix vector_shuffle combines when inputs are
scalar_to_vector of differing types.
This patch fixes the combines for vector_shuffles when either or both of its
left and right hand side inputs are scalar_to_vector nodes.
Previously, when both left and right side inputs are scalar_to_vector nodes,
the current combine could not handle this situation, as the shuffle mask was
updated incorrectly. To temporarily solve this solution, this combine was
simply disabled and not performed.
Now, not only does this patch aim to resolve the previous issue of the incorrect
shuffle mask adjustments respectively, but it also updates any test cases
that are affected by this change.
Patch migrated from https://reviews.llvm.org/D130487.
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 +++--
.../PowerPC/p8-scalar_vector_conversions.ll | 6 +-
.../PowerPC/v16i8_scalar_to_vector_shuffle.ll | 459 +++++++++---------
.../PowerPC/v2i64_scalar_to_vector_shuffle.ll | 434 +++++++----------
.../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 402 ++++++++-------
.../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 335 ++++++-------
6 files changed, 811 insertions(+), 934 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ec4f8f4be425ed..4ffa47dc7290a9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15687,16 +15687,18 @@ static SDValue isScalarToVec(SDValue Op) {
// On little endian, that's just the corresponding element in the other
// half of the vector. On big endian, it is in the same half but right
// justified rather than left justified in that half.
-static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
- int LHSMaxIdx, int RHSMinIdx,
- int RHSMaxIdx, int HalfVec,
- unsigned ValidLaneWidth,
- const PPCSubtarget &Subtarget) {
+static void fixupShuffleMaskForPermutedSToV(
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
for (int i = 0, e = ShuffV.size(); i < e; i++) {
int Idx = ShuffV[i];
- if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
ShuffV[i] +=
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
+ ShuffV[i] +=
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
}
}
@@ -15735,6 +15737,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
OrigSToV.getOperand(0));
}
+static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
+ int HalfVec, int LHSLastElementDefined,
+ int RHSLastElementDefined) {
+ for (int i : seq<int>(0, ShuffV.size())) {
+ int Index = ShuffV[i];
+ if (Index < 0) // Skip explicitly undefined mask indices.
+ continue;
+ // Handle first input vector of the vector_shuffle.
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
+ (Index > LHSLastElementDefined))
+ return false;
+ // Handle second input vector of the vector_shuffle.
+ if ((RHSLastElementDefined >= 0) &&
+ (Index > HalfVec + RHSLastElementDefined))
+ return false;
+ }
+ return true;
+}
+
// On little endian subtargets, combine shuffles such as:
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
// into:
@@ -15782,36 +15803,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
SDValue SToVLHS = isScalarToVec(LHS);
SDValue SToVRHS = isScalarToVec(RHS);
if (SToVLHS || SToVRHS) {
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
- // same type and have differing element sizes, then do not perform
- // the following transformation. The current transformation for
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
- // element size. This will be updated in the future to account for
- // differing sizes of the LHS and RHS.
- if (SToVLHS && SToVRHS &&
- (SToVLHS.getValueType().getScalarSizeInBits() !=
- SToVRHS.getValueType().getScalarSizeInBits()))
- return Res;
-
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
- : SToVRHS.getValueType().getVectorNumElements();
- int NumEltsOut = ShuffV.size();
+ EVT VT = SVN->getValueType(0);
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
+ int ShuffleNumElts = ShuffV.size();
+ int HalfVec = ShuffleNumElts / 2;
// The width of the "valid lane" (i.e. the lane that contains the value that
// is vectorized) needs to be expressed in terms of the number of elements
// of the shuffle. It is thereby the ratio of the values before and after
- // any bitcast.
- unsigned ValidLaneWidth =
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
- LHS.getValueType().getScalarSizeInBits()
- : SToVRHS.getValueType().getScalarSizeInBits() /
- RHS.getValueType().getScalarSizeInBits();
+ // any bitcast, which will be set later on if the LHS or RHS are
+ // SCALAR_TO_VECTOR nodes.
+ unsigned LHSNumValidElts = HalfVec;
+ unsigned RHSNumValidElts = HalfVec;
// Initially assume that neither input is permuted. These will be adjusted
- // accordingly if either input is.
- int LHSMaxIdx = -1;
- int RHSMinIdx = -1;
- int RHSMaxIdx = -1;
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
+ // accordingly if either input is. Note, that -1 means that all elements
+ // are undefined.
+ int LHSFirstElt = 0;
+ int RHSFirstElt = ShuffleNumElts;
+ int LHSLastElt = -1;
+ int RHSLastElt = -1;
// Get the permuted scalar to vector nodes for the source(s) that come from
// ISD::SCALAR_TO_VECTOR.
@@ -15820,33 +15830,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
// the value into element zero. Since scalar size of LHS and RHS may differ
// after isScalarToVec, this should be checked using their own sizes.
if (SToVLHS) {
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
+ if (!IsLittleEndian && LHSScalarSize >= 64)
return Res;
// Set up the values for the shuffle vector fixup.
- LHSMaxIdx = NumEltsOut / NumEltsIn;
+ LHSNumValidElts =
+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
+ // The last element that comes from the LHS. For example:
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+ // The last element that comes from the LHS is actually 0, not 3
+ // because elements 1 and higher of a scalar_to_vector are undefined.
+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
if (SToVLHS.getValueType() != LHS.getValueType())
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
LHS = SToVLHS;
}
if (SToVRHS) {
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
+ if (!IsLittleEndian && RHSScalarSize >= 64)
return Res;
- RHSMinIdx = NumEltsOut;
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
+ RHSNumValidElts =
+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
+ // The last element that comes from the RHS. For example:
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+ // The last element that comes from the RHS is actually 5, not 7
+ // because elements 1 and higher of a scalar_to_vector are undefined.
+ // It is also not 4 because the original scalar_to_vector is wider and
+ // actually contains two i32 elements.
+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
if (SToVRHS.getValueType() != RHS.getValueType())
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
RHS = SToVRHS;
}
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
+ return Res;
+
// Fix up the shuffle mask to reflect where the desired element actually is.
// The minimum and maximum indices that correspond to element zero for both
// the LHS and RHS are computed and will control which shuffle mask entries
// are to be changed. For example, if the RHS is permuted, any shuffle mask
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
- HalfVec, ValidLaneWidth, Subtarget);
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
+ fixupShuffleMaskForPermutedSToV(
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
// We may have simplified away the shuffle. We won't be able to do anything
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 8f12b182283f53..656bc3661178ac 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
;
; CHECK-LE-LABEL: buildi2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mtfprd f0, r4
+; CHECK-LE-NEXT: mtfprwz f0, r4
; CHECK-LE-NEXT: mtfprd f1, r3
-; CHECK-LE-NEXT: xxswapd vs0, vs0
-; CHECK-LE-NEXT: xxswapd v2, vs1
-; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-NEXT: blr
;
; CHECK-AIX-LABEL: buildi2:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 31d0960e19f4ef..3ab49cd39f8d80 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -266,56 +266,54 @@ entry:
define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r4
+; CHECK-LE-P8-NEXT: mtvsrd v3, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r4
+; CHECK-LE-P9-NEXT: mtvsrd v3, r3
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r4, r4, 56
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtvsrd v2, r4
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r4, r4, 56
-; CHECK-BE-P9-NEXT: sldi r3, r3, 48
-; CHECK-BE-P9-NEXT: mtvsrd v2, r4
-; CHECK-BE-P9-NEXT: mtvsrd v3, r3
-; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtfprwz f0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -348,56 +346,54 @@ entry:
define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: mtvsrd v2, r4
+; CHECK-LE-P8-NEXT: mtvsrd v3, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: mtvsrd v2, r4
+; CHECK-LE-P9-NEXT: mtvsrd v3, r3
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r4, r4, 56
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtvsrd v2, r4
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r4, r4, 56
-; CHECK-BE-P9-NEXT: sldi r3, r3, 48
-; CHECK-BE-P9-NEXT: mtvsrd v2, r4
-; CHECK-BE-P9-NEXT: mtvsrd v3, r3
-; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -472,7 +468,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
@@ -481,7 +477,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
@@ -545,7 +541,7 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C6(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
@@ -580,53 +576,54 @@ entry:
define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) {
; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrwz v3, r4
+; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: mtvsrws v3, r4
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r3
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r4
+; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 56
-; CHECK-BE-P8-NEXT: mtvsrd v2, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 32
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 56
-; CHECK-BE-P9-NEXT: mtvsrws v3, r4
-; CHECK-BE-P9-NEXT: mtvsrd v2, r3
-; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -660,53 +657,54 @@ entry:
define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: mtvsrd v2, r4
+; CHECK-LE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: mtvsrws v3, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: mtvsrd v2, r4
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r4, r4, 56
-; CHECK-BE-P8-NEXT: sldi r3, r3, 32
-; CHECK-BE-P8-NEXT: mtvsrd v2, r4
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI8_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI8_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r4, r4, 56
-; CHECK-BE-P9-NEXT: mtvsrws v3, r3
-; CHECK-BE-P9-NEXT: mtvsrd v2, r4
-; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI8_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI8_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r3
-; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -781,9 +779,9 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C9(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.1
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3
@@ -875,7 +873,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4
; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C11(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7
@@ -885,7 +883,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7
@@ -928,20 +926,16 @@ entry:
define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) {
; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v3, r4
+; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r3
+; CHECK-LE-P9-NEXT: mtvsrd v3, r4
+; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -1007,20 +1001,16 @@ entry:
define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: mtvsrd v2, r4
+; CHECK-LE-P8-NEXT: mtvsrd v3, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: mtvsrd v2, r4
+; CHECK-LE-P9-NEXT: mtvsrd v3, r3
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -1392,7 +1382,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C12(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5
@@ -1401,7 +1391,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r5, L..C8(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r3
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r5)
@@ -1439,53 +1429,54 @@ entry:
define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r7
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r8
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r7
+; CHECK-LE-P8-NEXT: mtvsrwz v3, r8
+; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r7
-; CHECK-LE-P9-NEXT: mtvsrws v3, r8
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r7
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r8
+; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r7, 48
-; CHECK-BE-P8-NEXT: mtvsrd v2, r3
-; CHECK-BE-P8-NEXT: sldi r3, r8, 32
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI17_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r7
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r8
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI17_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r7, 48
-; CHECK-BE-P9-NEXT: mtvsrws v3, r8
-; CHECK-BE-P9-NEXT: mtvsrd v2, r3
-; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI17_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtfprwz f0, r7
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r8
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI17_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C13(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1519,20 +1510,16 @@ entry:
define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r7
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r8
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r7
+; CHECK-LE-P8-NEXT: mtvsrd v3, r8
+; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r7
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r8
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r7
+; CHECK-LE-P9-NEXT: mtvsrd v3, r8
+; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1669,53 +1656,54 @@ entry:
define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v3, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: mtvsrws v2, r3
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrwz v2, r3
+; CHECK-LE-P9-NEXT: mtvsrd v3, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 32
-; CHECK-BE-P8-NEXT: mtvsrd v2, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 48
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI20_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI20_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: mtvsrws v2, r3
-; CHECK-BE-P9-NEXT: sldi r3, r4, 48
-; CHECK-BE-P9-NEXT: mtvsrd v3, r3
-; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI20_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI20_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C14(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3
-; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1824,18 +1812,15 @@ define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32>
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrglw v2, vs1, vs0
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghw v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxmrglw v2, vs1, vs0
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghw v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1899,20 +1884,16 @@ entry:
define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v3, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd v3, vs0
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: mtvsrd v2, r3
+; CHECK-LE-P9-NEXT: mtvsrd v3, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1978,27 +1959,23 @@ entry:
define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
-; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI24_0 at toc@l
-; CHECK-LE-P8-NEXT: xxswapd v2, f0
-; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd v4, vs0
-; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI24_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v4, 0, r4
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI24_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
-; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0 at toc@l
-; CHECK-LE-P9-NEXT: xxswapd v2, f0
; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm v2, v3, vs0
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -2026,7 +2003,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C15(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
@@ -2036,7 +2013,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C11(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 56c8c128ba9f40..fcfcda586694d5 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -30,42 +30,42 @@ define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: mtfprwz f1, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: mtfprwz f1, r4
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
@@ -102,42 +102,42 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_none_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_none_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_none_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
@@ -170,42 +170,42 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_none:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_none:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_none:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
@@ -237,54 +237,42 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 56
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 48
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 56
-; CHECK-BE-P9-NEXT: mtfprd f0, r3
-; CHECK-BE-P9-NEXT: sldi r3, r4, 48
-; CHECK-BE-P9-NEXT: mtfprd f1, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -320,54 +308,42 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 56
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 48
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 56
-; CHECK-BE-P9-NEXT: mtfprd f0, r3
-; CHECK-BE-P9-NEXT: sldi r3, r4, 48
-; CHECK-BE-P9-NEXT: mtfprd f1, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs1, vs0
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -404,42 +380,42 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b)
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_none:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_none:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_none:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
@@ -472,42 +448,42 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b)
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_none_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_none_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_none_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
@@ -538,52 +514,43 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 56
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 32
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 56
-; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT: mtfprd f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -618,52 +585,43 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 56
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 32
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 56
-; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT: mtfprd f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs1, vs0
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -700,42 +658,42 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b)
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprwz f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprwz f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_none_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -768,42 +726,42 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b)
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: mtfprwz f0, r3
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_none:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
; CHECK-LE-P9-NEXT: mtfprwz f0, r3
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_none:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_none:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
@@ -835,18 +793,14 @@ define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd v2, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxswapd v2, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -923,18 +877,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd v2, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxswapd v2, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -942,14 +892,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
; CHECK-BE-P8-NEXT: mtfprd f0, r4
; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: mtvsrdd v2, r4, r4
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
@@ -957,14 +907,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4
; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r4, r4
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
@@ -1159,42 +1109,42 @@ define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: mtfprwz f1, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: mtfprwz f1, r4
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
@@ -1229,52 +1179,43 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 32
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: sldi r3, r3, 48
-; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT: mtfprd f0, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1310,18 +1251,14 @@ define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd v2, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxswapd v2, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1398,42 +1335,42 @@ define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprwz f0, r3
; CHECK-LE-P8-NEXT: mtfprwz f1, r4
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: mtfprwz f1, r4
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: mtfprwz f0, r3
; CHECK-BE-P8-NEXT: mtfprwz f1, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: mtfprwz f1, r4
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
@@ -1467,53 +1404,44 @@ entry:
define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: sldi r3, r3, 32
-; CHECK-BE-P8-NEXT: mtfprd f0, r3
-; CHECK-BE-P8-NEXT: sldi r3, r4, 48
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: mtvsrws vs0, r3
-; CHECK-BE-P9-NEXT: sldi r3, r4, 48
-; CHECK-BE-P9-NEXT: mtfprd f1, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3
-; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1547,19 +1475,16 @@ entry:
define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
; CHECK-LE-P8-NEXT: mtfprd f1, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxswapd v2, vs1
-; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: mtfprd f1, r4
-; CHECK-LE-P9-NEXT: mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs1
-; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -1730,18 +1655,15 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtvsrws vs0, r4
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1749,14 +1671,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
; CHECK-BE-P8-NEXT: mtfprd f0, r3
; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
; CHECK-BE-P8-NEXT: mtfprwz f0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r4
; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
@@ -1764,14 +1686,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
@@ -1813,19 +1735,15 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: mtfprd f0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, vs0
-; CHECK-LE-P9-NEXT: mtfprd f0, r4
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1833,14 +1751,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
; CHECK-BE-P8-NEXT: mtfprd f0, r3
; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
; CHECK-BE-P8-NEXT: mtfprwz f0, r4
-; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: mtfprwz f0, r4
; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
@@ -1848,14 +1766,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index c8e7b20e4b8c37..402a4f34e62b24 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -28,15 +28,11 @@
define void @test_none_v8i16(ptr %a) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT: lxsdx v4, 0, r3
-; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT: mtvsrd v3, r4
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: vperm v2, v3, v4, v2
-; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: lfdx f1, 0, r3
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stfdx f0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
@@ -44,11 +40,8 @@ define void @test_none_v8i16(ptr %a) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT: lfd f1, 0(r3)
-; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm vs1, vs0, vs2
-; CHECK-LE-P9-NEXT: xxswapd vs0, vs1
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
; CHECK-LE-P9-NEXT: stfd f0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -131,11 +124,15 @@ define void @test_v8i16_none(ptr %a) {
; CHECK-LE-P8-LABEL: test_v8i16_none:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT: mtfprd f1, r3
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: mtvsrd v4, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
@@ -143,59 +140,76 @@ define void @test_v8i16_none(ptr %a) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_none:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-BE-P8-NEXT: mtfprwz f0, r4
-; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_none:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-BE-P9-NEXT: stxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-AIX-64-P9-NEXT: stxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr undef, align 1
@@ -264,7 +278,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-AIX-64-P8-NEXT: mffprwz r4, f0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
@@ -275,7 +289,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-64-P9-NEXT: li r4, 0
; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
-; CHECK-AIX-64-P9-NEXT: ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1
; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
@@ -286,7 +300,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
@@ -297,7 +311,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P9-NEXT: lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
@@ -369,7 +383,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-AIX-64-P8-NEXT: mffprwz r4, f0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C2(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4
; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
@@ -380,7 +394,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-64-P9-NEXT: li r4, 0
; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
-; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r4, L..C2(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
@@ -391,7 +405,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
@@ -402,7 +416,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r4, L..C2(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
@@ -474,12 +488,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
;
; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.1
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
@@ -487,7 +501,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs1, 0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
@@ -497,12 +511,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT: lxvw4x v5, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1
; CHECK-AIX-32-P8-NEXT: vperm v2, v5, v2, v4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
@@ -511,7 +525,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: mtfprwz f1, r4
; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs1, 0
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
@@ -595,7 +609,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
; CHECK-LE-P8-NEXT: mtfprd f0, r4
; CHECK-LE-P8-NEXT: mtfprd f1, r3
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
@@ -604,7 +618,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT: lxsihzx f1, 0, r3
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -614,7 +628,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
; CHECK-BE-P8-NEXT: mtfprwz f0, r4
; CHECK-BE-P8-NEXT: mtfprwz f1, r3
-; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT: blr
;
@@ -622,7 +636,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT: lxsihzx f1, 0, r3
-; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
@@ -632,7 +646,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r3
-; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT: blr
;
@@ -640,7 +654,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT: lxsihzx f1, 0, r3
-; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
@@ -650,7 +664,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4
; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
@@ -658,7 +672,7 @@ define void @test_v8i16_v8i16(ptr %a) {
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT: lxsihzx f1, 0, r3
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: xxmrglw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
@@ -681,85 +695,82 @@ define void @test_v8i16_v4i32(ptr %a) {
; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0
-; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0
-; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-BE-P9-NEXT: stxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0
-; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0
-; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-AIX-64-P9-NEXT: stxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
-; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr undef, align 1
@@ -780,20 +791,16 @@ define void @test_v8i16_v2i64(ptr %a) {
; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
; CHECK-LE-P8-NEXT: lfdx f1, 0, r3
; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lfd f1, 0(r3)
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -838,23 +845,22 @@ define void @test_v8i16_v2i64(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
-; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr undef, align 1
@@ -914,7 +920,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
@@ -923,7 +929,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
@@ -932,7 +938,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C7(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
@@ -941,7 +947,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C6(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
@@ -960,84 +966,81 @@ define void @test_v4i32_v8i16(ptr %a) {
; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtfprd f1, r3
-; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
@@ -1058,9 +1061,7 @@ define void @test_v4i32_v2i64(ptr %a) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd vs0, f0
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
@@ -1069,9 +1070,7 @@ define void @test_v4i32_v2i64(ptr %a) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: xxswapd vs1, f1
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -1227,9 +1226,7 @@ define void @test_v2i64_v4i32(ptr %a) {
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd vs0, f0
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
@@ -1238,9 +1235,7 @@ define void @test_v2i64_v4i32(ptr %a) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: xxswapd vs1, f1
-; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -1315,20 +1310,16 @@ define void @test_v2i64_v8i16(ptr %a) {
; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
; CHECK-LE-P8-NEXT: lfdx f1, 0, r3
; CHECK-LE-P8-NEXT: mtfprd f0, r4
-; CHECK-LE-P8-NEXT: xxswapd vs1, f1
-; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
-; CHECK-LE-P9-NEXT: xxswapd vs0, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lfd f1, 0(r3)
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -1373,22 +1364,21 @@ define void @test_v2i64_v8i16(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
-; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index e1aa531db449e5..47ffdb4625ed39 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -241,16 +241,13 @@ entry:
define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 {
; CHECK-LE-P8-LABEL: test_none_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r5
; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT: mtvsrd v3, r5
; CHECK-LE-P8-NEXT: lxsiwzx v4, 0, r3
; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_1 at toc@ha
-; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_1 at toc@l
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v2
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT: xxswapd vs0, v2
@@ -261,13 +258,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P9-NEXT: mtvsrd v3, r5
+; CHECK-LE-P9-NEXT: mtfprd f0, r5
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm v3, v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v3, v3, v3
; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P9-NEXT: xxswapd vs0, v2
; CHECK-LE-P9-NEXT: stfd f0, 0(r3)
@@ -275,15 +270,13 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
;
; CHECK-BE-P8-LABEL: test_none_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P8-NEXT: mtvsrwz v2, r5
-; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT: vperm v2, v2, v2, v3
+; CHECK-BE-P8-NEXT: sldi r4, r5, 56
; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3
-; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrd v2, r4
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0 at toc@l
; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v2
; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT: stxsdx v2, 0, r3
; CHECK-BE-P8-NEXT: blr
@@ -291,27 +284,24 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: sldi r3, r5, 56
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P9-NEXT: mtvsrwz v3, r5
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P9-NEXT: vmrghh v3, v3, v3
; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT: xxperm v3, v3, vs0
; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P9-NEXT: stxsd v2, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r5
-; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: sldi r4, r5, 56
; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v2
; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT: stxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT: blr
@@ -319,12 +309,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r5, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r5
-; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P9-NEXT: vmrghh v3, v3, v3
; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT: xxperm v3, v3, vs0
; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P9-NEXT: stxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
@@ -421,7 +410,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
@@ -431,7 +420,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs2, vs1
@@ -531,7 +520,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
@@ -543,7 +532,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
@@ -635,7 +624,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4
; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
@@ -645,7 +634,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs2, vs1
@@ -739,7 +728,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r4)
; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
@@ -747,7 +736,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
@@ -784,78 +773,75 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1
-; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr %a
@@ -874,20 +860,16 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -927,21 +909,20 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr %a
@@ -1045,7 +1026,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C10(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
@@ -1057,7 +1038,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
@@ -1078,78 +1059,75 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-BE-P8-NEXT: sldi r3, r3, 48
-; CHECK-BE-P8-NEXT: mtvsrd v3, r3
-; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1
-; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C11(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr %a
@@ -1167,20 +1145,16 @@ entry:
define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, f0
-; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, f0
-; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -1218,7 +1192,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C12(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
@@ -1227,7 +1201,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C11(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
@@ -1310,7 +1284,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P8: # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C11(r2) # %const.0
; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
@@ -1322,7 +1296,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C10(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
@@ -1334,7 +1308,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C13(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
@@ -1346,7 +1320,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C12(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
@@ -1366,20 +1340,16 @@ entry:
define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, f0
-; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT: xxswapd v2, f0
-; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1417,7 +1387,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C11(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C14(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
@@ -1426,7 +1396,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C13(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
@@ -1448,20 +1418,16 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT: mtfprd f0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, vs0
-; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT: xxswapd v3, f0
-; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1501,21 +1467,20 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
-; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C15(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C14(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = load <2 x i8>, ptr %a
>From 9615c3a9ddd0c3ace18713da9d046b69f962e9d5 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 5 Apr 2024 10:50:47 -0500
Subject: [PATCH 2/6] Address coding guideline comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4ffa47dc7290a9..a1ba62b98590cc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15691,13 +15691,13 @@ static void fixupShuffleMaskForPermutedSToV(
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
- for (int i = 0, e = ShuffV.size(); i < e; i++) {
- int Idx = ShuffV[i];
+ for (int I = 0, E = ShuffV.size(); I < E; ++I) {
+ int Idx = ShuffV[I];
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
- ShuffV[i] +=
+ ShuffV[I] +=
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
- ShuffV[i] +=
+ ShuffV[I] +=
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
}
}
@@ -15740,8 +15740,8 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
int HalfVec, int LHSLastElementDefined,
int RHSLastElementDefined) {
- for (int i : seq<int>(0, ShuffV.size())) {
- int Index = ShuffV[i];
+ for (int I : seq<int>(0, ShuffV.size())) {
+ int Index = ShuffV[I];
if (Index < 0) // Skip explicitly undefined mask indices.
continue;
// Handle first input vector of the vector_shuffle.
>From 92906f067f260d846b8ddce4dd2895521a5b3b4b Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Thu, 18 Apr 2024 10:41:08 -0500
Subject: [PATCH 3/6] Save frequent calculations into separate variables, and
pull out common code into a static function.
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 66 ++++++++++++---------
1 file changed, 37 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a1ba62b98590cc..c902d8a52a3cc1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15691,14 +15691,16 @@ static void fixupShuffleMaskForPermutedSToV(
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
+ int LHSEltFixup =
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
+ int RHSEltFixup =
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
int Idx = ShuffV[I];
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
- ShuffV[I] +=
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
+ ShuffV[I] += LHSEltFixup;
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
- ShuffV[I] +=
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
+ ShuffV[I] += RHSEltFixup;
}
}
@@ -15756,6 +15758,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
return true;
}
+static SDValue generateSToVPermutedForVecShuffle(
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
+ // Set up the values for the shuffle vector fixup.
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
+ // The last element depends on if the input comes from the LHS or RHS.
+ //
+ // For example:
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+ //
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
+ // because elements 1 and higher of a scalar_to_vector are undefined.
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
+ // because elements 1 and higher of a scalar_to_vector are undefined.
+ // It is also not 4 because the original scalar_to_vector is wider and
+ // actually contains two i32 elements.
+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
+ return SToVPermuted;
+}
+
// On little endian subtargets, combine shuffles such as:
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
// into:
@@ -15833,36 +15860,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && LHSScalarSize >= 64)
return Res;
- // Set up the values for the shuffle vector fixup.
- LHSNumValidElts =
- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
- // The last element that comes from the LHS. For example:
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
- // The last element that comes from the LHS is actually 0, not 3
- // because elements 1 and higher of a scalar_to_vector are undefined.
- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
- if (SToVLHS.getValueType() != LHS.getValueType())
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
- LHS = SToVLHS;
+ LHS = generateSToVPermutedForVecShuffle(
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
}
if (SToVRHS) {
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && RHSScalarSize >= 64)
return Res;
- RHSNumValidElts =
- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
- // The last element that comes from the RHS. For example:
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
- // The last element that comes from the RHS is actually 5, not 7
- // because elements 1 and higher of a scalar_to_vector are undefined.
- // It is also not 4 because the original scalar_to_vector is wider and
- // actually contains two i32 elements.
- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
- if (SToVRHS.getValueType() != RHS.getValueType())
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
- RHS = SToVRHS;
+ RHS = generateSToVPermutedForVecShuffle(
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
}
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
>From fc931e40b66272245c18ceed325daf565bfd3351 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Mon, 4 Nov 2024 15:32:27 -0600
Subject: [PATCH 4/6] Addressed review comments from Kai and Digger
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++++++++++--------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c902d8a52a3cc1..2c2fde084fe3c0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15742,8 +15742,7 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
int HalfVec, int LHSLastElementDefined,
int RHSLastElementDefined) {
- for (int I : seq<int>(0, ShuffV.size())) {
- int Index = ShuffV[I];
+ for (int Index : ShuffV) {
if (Index < 0) // Skip explicitly undefined mask indices.
continue;
// Handle first input vector of the vector_shuffle.
@@ -15776,7 +15775,9 @@ static SDValue generateSToVPermutedForVecShuffle(
// because elements 1 and higher of a scalar_to_vector are undefined.
// It is also not 4 because the original scalar_to_vector is wider and
// actually contains two i32 elements.
- LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
+ LastElt = (uint64_t) ScalarSize > ShuffleEltWidth
+ ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
+ : FirstElt;
SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
if (SToVPermuted.getValueType() != VecShuffOperandType)
SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
@@ -15856,22 +15857,26 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
// than 64 bits since for 64-bit elements, all instructions already put
// the value into element zero. Since scalar size of LHS and RHS may differ
// after isScalarToVec, this should be checked using their own sizes.
+ int LHSScalarSize = 0;
+ int RHSScalarSize = 0;
if (SToVLHS) {
- int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
+ LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && LHSScalarSize >= 64)
return Res;
- LHS = generateSToVPermutedForVecShuffle(
- LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
- LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
}
if (SToVRHS) {
- int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
+ RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && RHSScalarSize >= 64)
return Res;
+ }
+ if (LHSScalarSize != 0)
+ LHS = generateSToVPermutedForVecShuffle(
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
+ if (RHSScalarSize != 0)
RHS = generateSToVPermutedForVecShuffle(
RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
- }
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
return Res;
>From e122832d335b6487902de42d1fe54c755d937746 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Mon, 4 Nov 2024 19:33:17 -0600
Subject: [PATCH 5/6] Fix clang-format on a line in
generateSToVPermutedForVecShuffle()
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c2fde084fe3c0..4ef767e03ea2de 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15775,7 +15775,7 @@ static SDValue generateSToVPermutedForVecShuffle(
// because elements 1 and higher of a scalar_to_vector are undefined.
// It is also not 4 because the original scalar_to_vector is wider and
// actually contains two i32 elements.
- LastElt = (uint64_t) ScalarSize > ShuffleEltWidth
+ LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
? ScalarSize / ShuffleEltWidth - 1 + FirstElt
: FirstElt;
SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
>From 8b196580f9fbe281173804387c54dd1e8674c17d Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Thu, 7 Nov 2024 15:22:09 -0600
Subject: [PATCH 6/6] Address nit of adding else if
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4ef767e03ea2de..7b26d00eb3d0b9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15699,7 +15699,7 @@ static void fixupShuffleMaskForPermutedSToV(
int Idx = ShuffV[I];
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
ShuffV[I] += LHSEltFixup;
- if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
+ else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
ShuffV[I] += RHSEltFixup;
}
}
More information about the llvm-commits
mailing list