[llvm] r367516 - recommit:[PowerPC] Eliminate loads/swap feeding swap/store for vector type by using big-endian load/store
Zi Xuan Wu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 22:26:02 PDT 2019
Author: wuzish
Date: Wed Jul 31 22:26:02 2019
New Revision: 367516
URL: http://llvm.org/viewvc/llvm-project?rev=367516&view=rev
Log:
recommit:[PowerPC] Eliminate loads/swap feeding swap/store for vector type by using big-endian load/store
In PowerPC, there is instruction to load vector in big endian element order when it's in little endian target.
So we can combine vector load + reverse into big endian load to eliminate the swap instruction.
Also combine vector reverse + store into big endian store.
Differential Revision: https://reviews.llvm.org/D65063
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
llvm/trunk/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
llvm/trunk/test/CodeGen/PowerPC/vsx_shuffle_le.ll
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed Jul 31 22:26:02 2019
@@ -1118,6 +1118,8 @@ PPCTargetLowering::PPCTargetLowering(con
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
@@ -1352,6 +1354,8 @@ const char *PPCTargetLowering::getTarget
case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
+ case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
+ case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
case PPCISD::ST_VSR_SCAL_INT:
return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -13113,6 +13117,61 @@ SDValue PPCTargetLowering::combineStoreF
return Val;
}
+SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
+ LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const {
+ assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
+ "Not a reverse memop pattern!");
+
+ auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
+ auto Mask = SVN->getMask();
+ int i = 0;
+ auto I = Mask.rbegin();
+ auto E = Mask.rend();
+
+ for (; I != E; ++I) {
+ if (*I != i)
+ return false;
+ i++;
+ }
+ return true;
+ };
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = SVN->getValueType(0);
+
+ if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+ return SDValue();
+
+ // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
+ // See comment in PPCVSXSwapRemoval.cpp.
+ // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
+ if (!Subtarget.hasP9Vector())
+ return SDValue();
+
+ if(!IsElementReverse(SVN))
+ return SDValue();
+
+ if (LSBase->getOpcode() == ISD::LOAD) {
+ SDLoc dl(SVN);
+ SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ if (LSBase->getOpcode() == ISD::STORE) {
+ SDLoc dl(LSBase);
+ SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
+ LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ llvm_unreachable("Expected a load or store node here");
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -13159,6 +13218,12 @@ SDValue PPCTargetLowering::PerformDAGCom
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
+ case ISD::VECTOR_SHUFFLE:
+ if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
+ LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
+ return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
+ }
+ break;
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();
@@ -13169,6 +13234,13 @@ SDValue PPCTargetLowering::PerformDAGCom
if (Val)
return Val;
}
+
+ if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
+ SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
+ if (Val)
+ return Val;
+ }
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Wed Jul 31 22:26:02 2019
@@ -456,6 +456,11 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+ /// the vector type to load vector in big-endian element order.
+ LOAD_VEC_BE,
+
/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,
@@ -465,6 +470,11 @@ namespace llvm {
/// an xxswapd.
STXVD2X,
+ /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+ /// the vector type to store vector in big-endian element order.
+ STORE_VEC_BE,
+
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
@@ -1167,6 +1177,8 @@ namespace llvm {
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Wed Jul 31 22:26:02 2019
@@ -78,12 +78,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [
def SDTVabsd : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
]>;
-
+def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
+def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
+ [SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
@@ -1088,6 +1097,19 @@ let Predicates = [HasVSX, HasOnlySwappin
(STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
+
+// Load vector big endian order
+let Predicates = [IsLittleEndian, HasVSX] in {
+ def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+ def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+}
+
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -3024,6 +3046,16 @@ let AddedComplexity = 400, Predicates =
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+ def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+ def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
Modified: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll Wed Jul 31 22:26:02 2019
@@ -986,11 +986,7 @@ define <4 x i32> @fromDiffMemConsDi(i32*
;
; P9LE-LABEL: fromDiffMemConsDi:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 0(r3)
-; P9LE-NEXT: addis r3, r2, .LCPI8_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI8_0 at toc@l
-; P9LE-NEXT: lxvx v3, 0, r3
-; P9LE-NEXT: vperm v2, v2, v2, v3
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDi:
@@ -2570,11 +2566,7 @@ define <4 x i32> @fromDiffMemConsDui(i32
;
; P9LE-LABEL: fromDiffMemConsDui:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 0(r3)
-; P9LE-NEXT: addis r3, r2, .LCPI41_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI41_0 at toc@l
-; P9LE-NEXT: lxvx v3, 0, r3
-; P9LE-NEXT: vperm v2, v2, v2, v3
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDui:
@@ -4155,8 +4147,8 @@ define <2 x i64> @fromDiffMemConsDll(i64
;
; P9LE-LABEL: fromDiffMemConsDll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 16(r3)
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDll:
@@ -4235,9 +4227,8 @@ define <2 x i64> @fromDiffMemVarDll(i64*
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDll:
@@ -4948,8 +4939,8 @@ define <2 x i64> @fromDiffMemConsDConvdt
;
; P9LE-LABEL: fromDiffMemConsDConvdtoll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv vs0, 16(r3)
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpsxds v2, vs0
; P9LE-NEXT: blr
;
@@ -5040,9 +5031,8 @@ define <2 x i64> @fromDiffMemVarDConvdto
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx vs0, r3, r4
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpsxds v2, vs0
; P9LE-NEXT: blr
;
@@ -5402,8 +5392,8 @@ define <2 x i64> @fromDiffMemConsDull(i6
;
; P9LE-LABEL: fromDiffMemConsDull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 16(r3)
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDull:
@@ -5482,9 +5472,8 @@ define <2 x i64> @fromDiffMemVarDull(i64
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDull:
@@ -6195,8 +6184,8 @@ define <2 x i64> @fromDiffMemConsDConvdt
;
; P9LE-LABEL: fromDiffMemConsDConvdtoull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv vs0, 16(r3)
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpuxds v2, vs0
; P9LE-NEXT: blr
;
@@ -6287,9 +6276,8 @@ define <2 x i64> @fromDiffMemVarDConvdto
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx vs0, r3, r4
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpuxds v2, vs0
; P9LE-NEXT: blr
;
Modified: llvm/trunk/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll Wed Jul 31 22:26:02 2019
@@ -19,8 +19,7 @@ define <2 x i64> @load_swap00(<2 x i64>*
;
; CHECK-P9-LABEL: load_swap00:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: xxswapd v2, v2
+; CHECK-P9-NEXT: lxvd2x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap00:
@@ -48,8 +47,7 @@ define <2 x i64> @load_swap01(<2 x i64>*
;
; CHECK-P9-LABEL: load_swap01:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: xxswapd v2, v2
+; CHECK-P9-NEXT: lxvd2x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap01:
@@ -81,11 +79,7 @@ define <4 x i32> @load_swap10(<4 x i32>*
;
; CHECK-P9-LABEL: load_swap10:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap10:
@@ -123,11 +117,7 @@ define <4 x i32> @load_swap11(<4 x i32>*
;
; CHECK-P9-LABEL: load_swap11:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap11:
@@ -165,11 +155,7 @@ define <8 x i16> @load_swap20(<8 x i16>*
;
; CHECK-P9-LABEL: load_swap20:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvh8x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap20:
@@ -207,11 +193,7 @@ define <8 x i16> @load_swap21(<8 x i16>*
;
; CHECK-P9-LABEL: load_swap21:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvh8x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap21:
@@ -249,8 +231,7 @@ define <16 x i8> @load_swap30(<16 x i8>*
;
; CHECK-P9-LABEL: load_swap30:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-P9-NEXT: xxbrq v2, vs0
+; CHECK-P9-NEXT: lxvb16x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap30:
@@ -285,8 +266,7 @@ define <16 x i8> @load_swap31(<16 x i8>*
;
; CHECK-P9-LABEL: load_swap31:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxbrq v2, vs0
+; CHECK-P9-NEXT: lxvb16x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap31:
@@ -317,8 +297,7 @@ define <2 x double> @load_swap40(<2 x do
;
; CHECK-P9-LABEL: load_swap40:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: lxvd2x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap40:
@@ -350,11 +329,7 @@ define <4 x float> @load_swap50(<4 x flo
;
; CHECK-P9-LABEL: load_swap50:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap50:
@@ -392,11 +367,7 @@ define <4 x float> @load_swap51(<4 x flo
;
; CHECK-P9-LABEL: load_swap51:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap51:
@@ -430,8 +401,7 @@ define void @swap_store00(<2 x i64> %v1,
;
; CHECK-P9-LABEL: swap_store00:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store00:
@@ -458,8 +428,7 @@ define void @swap_store01(<2 x i64> %v1,
;
; CHECK-P9-LABEL: swap_store01:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store01:
@@ -490,11 +459,7 @@ define void @swap_store10(<4 x i32> %v1,
;
; CHECK-P9-LABEL: swap_store10:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI13_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI13_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store10:
@@ -531,11 +496,7 @@ define void @swap_store11(<4 x i32> %v1,
;
; CHECK-P9-LABEL: swap_store11:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0 at toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store11:
@@ -572,11 +533,7 @@ define void @swap_store20(<8 x i16> %v1,
;
; CHECK-P9-LABEL: swap_store20:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvh8x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store20:
@@ -613,11 +570,7 @@ define void @swap_store21(<8 x i16> %v1,
;
; CHECK-P9-LABEL: swap_store21:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvh8x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store21:
@@ -654,8 +607,7 @@ define void @swap_store30(<16 x i8> %v1,
;
; CHECK-P9-LABEL: swap_store30:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxbrq vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvb16x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store30:
@@ -689,8 +641,7 @@ define void @swap_store31(<16 x i8> %v1,
;
; CHECK-P9-LABEL: swap_store31:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxbrq vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvb16x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store31:
@@ -720,8 +671,7 @@ define void @swap_store40(<2 x double> %
;
; CHECK-P9-LABEL: swap_store40:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store40:
@@ -748,8 +698,7 @@ define void @swap_store41(<2 x double> %
;
; CHECK-P9-LABEL: swap_store41:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store41:
@@ -780,11 +729,7 @@ define void @swap_store50(<4 x float> %v
;
; CHECK-P9-LABEL: swap_store50:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI21_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI21_0 at toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store50:
@@ -821,11 +766,7 @@ define void @swap_store51(<4 x float> %v
;
; CHECK-P9-LABEL: swap_store51:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI22_0 at toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI22_0 at toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store51:
Modified: llvm/trunk/test/CodeGen/PowerPC/vsx_shuffle_le.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx_shuffle_le.ll?rev=367516&r1=367515&r2=367516&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx_shuffle_le.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx_shuffle_le.ll Wed Jul 31 22:26:02 2019
@@ -85,8 +85,7 @@ define <2 x double> @test10(<2 x double>
; CHECK: lxvd2x 34, 0, 3
; CHECK-P9-LABEL: @test10
-; CHECK-P9: lxv 0, 0(3)
-; CHECK-P9: xxswapd 34, 0
+; CHECK-P9: lxvd2x 34, 0, 3
}
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -257,8 +256,7 @@ define <2 x double> @test32(<2 x double>
; CHECK: lxvd2x 34, 0, 4
; CHECK-P9-LABEL: @test32
-; CHECK-P9: lxv 0, 0(4)
-; CHECK-P9: xxswapd 34, 0
+; CHECK-P9: lxvd2x 34, 0, 4
}
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
More information about the llvm-commits
mailing list