[llvm] 71be020 - [SelectionDAG][PowerPC] Memset reuse vector element for tail store
Ting Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 22:54:50 PDT 2023
Author: Ting Wang
Date: 2023-09-06T01:52:38-04:00
New Revision: 71be020dda2c97c2733e45f4b1003d1c135b3b43
URL: https://github.com/llvm/llvm-project/commit/71be020dda2c97c2733e45f4b1003d1c135b3b43
DIFF: https://github.com/llvm/llvm-project/commit/71be020dda2c97c2733e45f4b1003d1c135b3b43.diff
LOG: [SelectionDAG][PowerPC] Memset reuse vector element for tail store
On PPC there are instructions to store element from vector(e.g.
stxsdx/stxsiwx), and these instructions can be leveraged to avoid tail
constant in memset and constant splat array initialization.
This patch tries to explore these opportunities.
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D138883
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstrP10.td
llvm/test/CodeGen/PowerPC/memset-tail.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fe0bce8df329747..12b280d5b1a0bcd 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -845,6 +845,15 @@ class TargetLoweringBase {
return false;
}
+ /// Return true if the target shall perform extract vector element and store
+ /// given that the vector is known to be splat of constant.
+ /// \p Index[out] gives the index of the vector element to be extracted when
+ /// this is true.
+ virtual bool shallExtractConstSplatVectorElementToStore(
+ Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
+ return false;
+ }
+
/// Return true if inserting a scalar into a variable element of an undef
/// vector is more efficiently handled by splatting the scalar instead.
virtual bool shouldSplatInsEltVarIndex(EVT) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 12cb4a5f7f83e7b..30bee510e1e78bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7757,13 +7757,28 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
// If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
+ // the smaller value for free with a truncate or extract vector element and
+ // then store.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
+ unsigned Index;
+ unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
+ EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
- else
+ else if (LargestVT.isVector() && !VT.isVector() &&
+ TLI.shallExtractConstSplatVectorElementToStore(
+ LargestVT.getTypeForEVT(*DAG.getContext()),
+ VT.getSizeInBits(), Index) &&
+ TLI.isTypeLegal(SVT) &&
+ LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
+ // Target which can combine store(extractelement VectorTy, Idx) can get
+ // the smaller value for free.
+ SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
+ Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
+ DAG.getVectorIdxConstant(Index, dl));
+ } else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 33b2867b5dd8b88..6bc89891c0dc44d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1635,6 +1635,27 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
+ Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
+ if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
+ return false;
+
+ if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
+ if (VTy->getScalarType()->isIntegerTy()) {
+ // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
+ if (ElemSizeInBits == 32) {
+ Index = Subtarget.isLittleEndian() ? 2 : 1;
+ return true;
+ }
+ if (ElemSizeInBits == 64) {
+ Index = Subtarget.isLittleEndian() ? 1 : 0;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -17086,10 +17107,20 @@ EVT PPCTargetLowering::getOptimalMemOpType(
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Op.size() >= 16 &&
- (Op.isAligned(Align(16)) ||
- ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
- return MVT::v4i32;
+ if (Subtarget.hasAltivec() && Op.size() >= 16) {
+ if (Op.isMemset() && Subtarget.hasVSX()) {
+ uint64_t TailSize = Op.size() % 16;
+ // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
+ // element if vector element type matches tail store. For tail size
+ // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
+ if (TailSize > 2 && TailSize <= 4) {
+ return MVT::v8i16;
+ }
+ return MVT::v4i32;
+ }
+ if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
+ return MVT::v4i32;
+ }
}
if (Subtarget.isPPC64()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6ef2d607ff9d3bc..8cbefbdb917359e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -791,6 +791,11 @@ namespace llvm {
return true;
}
+ bool
+ shallExtractConstSplatVectorElementToStore(Type *VectorTy,
+ unsigned ElemSizeInBits,
+ unsigned &Index) const override;
+
bool isCtlzFast() const override {
return true;
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index fdfb762eec13e82..a5429b38dfbe2d6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2031,8 +2031,15 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
(v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>;
def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))),
(v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
}
+let Predicates = [IsISA3_1, IsBigEndian] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
+}
+
// FIXME: The swap is overkill when the shift amount is a constant.
// We should just fix the constant in the DAG.
let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll
index 4c94d543569122e..ae14768219a9287 100644
--- a/llvm/test/CodeGen/PowerPC/memset-tail.ll
+++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll
@@ -169,59 +169,45 @@ define dso_local void @memsetTailV1B8(ptr nocapture noundef writeonly %p) local_
; P8-BE-LABEL: memsetTailV1B8:
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: vspltisb 2, 15
-; P8-BE-NEXT: lis 4, 3855
-; P8-BE-NEXT: ori 4, 4, 3855
-; P8-BE-NEXT: rldimi 4, 4, 32, 0
+; P8-BE-NEXT: li 4, 16
+; P8-BE-NEXT: stxsdx 34, 3, 4
; P8-BE-NEXT: stxvw4x 34, 0, 3
-; P8-BE-NEXT: std 4, 16(3)
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memsetTailV1B8:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, 3855
; P9-BE-NEXT: xxspltib 0, 15
-; P9-BE-NEXT: ori 4, 4, 3855
; P9-BE-NEXT: stxv 0, 0(3)
-; P9-BE-NEXT: rldimi 4, 4, 32, 0
-; P9-BE-NEXT: std 4, 16(3)
+; P9-BE-NEXT: stfd 0, 16(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B8:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 252645135
-; P10-BE-NEXT: rldimi 4, 4, 32, 0
-; P10-BE-NEXT: std 4, 16(3)
; P10-BE-NEXT: xxspltib 0, 15
; P10-BE-NEXT: stxv 0, 0(3)
+; P10-BE-NEXT: stfd 0, 16(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memsetTailV1B8:
; P8-LE: # %bb.0: # %entry
-; P8-LE-NEXT: lis 4, 3855
; P8-LE-NEXT: vspltisb 2, 15
-; P8-LE-NEXT: ori 4, 4, 3855
-; P8-LE-NEXT: rldimi 4, 4, 32, 0
-; P8-LE-NEXT: std 4, 16(3)
+; P8-LE-NEXT: li 4, 16
+; P8-LE-NEXT: stxsdx 34, 3, 4
; P8-LE-NEXT: stxvd2x 34, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memsetTailV1B8:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, 3855
; P9-LE-NEXT: xxspltib 0, 15
-; P9-LE-NEXT: ori 4, 4, 3855
; P9-LE-NEXT: stxv 0, 0(3)
-; P9-LE-NEXT: rldimi 4, 4, 32, 0
-; P9-LE-NEXT: std 4, 16(3)
+; P9-LE-NEXT: stfd 0, 16(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B8:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 252645135
-; P10-LE-NEXT: rldimi 4, 4, 32, 0
-; P10-LE-NEXT: std 4, 16(3)
; P10-LE-NEXT: xxspltib 0, 15
; P10-LE-NEXT: stxv 0, 0(3)
+; P10-LE-NEXT: stfd 0, 16(3)
; P10-LE-NEXT: blr
entry:
tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 24, i1 false)
@@ -231,63 +217,45 @@ entry:
define dso_local void @memsetTailV1B7(ptr nocapture noundef writeonly %p) local_unnamed_addr {
; P8-BE-LABEL: memsetTailV1B7:
; P8-BE: # %bb.0: # %entry
-; P8-BE-NEXT: lis 4, 3855
; P8-BE-NEXT: vspltisb 2, 15
-; P8-BE-NEXT: li 5, 15
-; P8-BE-NEXT: ori 4, 4, 3855
-; P8-BE-NEXT: rldimi 4, 4, 32, 0
-; P8-BE-NEXT: stdx 4, 3, 5
+; P8-BE-NEXT: li 4, 15
+; P8-BE-NEXT: stxsdx 34, 3, 4
; P8-BE-NEXT: stxvw4x 34, 0, 3
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memsetTailV1B7:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, 3855
-; P9-BE-NEXT: li 5, 15
-; P9-BE-NEXT: ori 4, 4, 3855
-; P9-BE-NEXT: rldimi 4, 4, 32, 0
-; P9-BE-NEXT: stdx 4, 3, 5
; P9-BE-NEXT: xxspltib 0, 15
+; P9-BE-NEXT: stfd 0, 15(3)
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B7:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 252645135
-; P10-BE-NEXT: rldimi 4, 4, 32, 0
-; P10-BE-NEXT: pstd 4, 15(3), 0
; P10-BE-NEXT: xxspltib 0, 15
+; P10-BE-NEXT: stfd 0, 15(3)
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memsetTailV1B7:
; P8-LE: # %bb.0: # %entry
-; P8-LE-NEXT: lis 4, 3855
; P8-LE-NEXT: vspltisb 2, 15
-; P8-LE-NEXT: li 5, 15
-; P8-LE-NEXT: ori 4, 4, 3855
-; P8-LE-NEXT: rldimi 4, 4, 32, 0
-; P8-LE-NEXT: stdx 4, 3, 5
+; P8-LE-NEXT: li 4, 15
+; P8-LE-NEXT: stxsdx 34, 3, 4
; P8-LE-NEXT: stxvd2x 34, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memsetTailV1B7:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, 3855
-; P9-LE-NEXT: li 5, 15
-; P9-LE-NEXT: ori 4, 4, 3855
-; P9-LE-NEXT: rldimi 4, 4, 32, 0
-; P9-LE-NEXT: stdx 4, 3, 5
; P9-LE-NEXT: xxspltib 0, 15
+; P9-LE-NEXT: stfd 0, 15(3)
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B7:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 252645135
-; P10-LE-NEXT: rldimi 4, 4, 32, 0
-; P10-LE-NEXT: pstd 4, 15(3), 0
; P10-LE-NEXT: xxspltib 0, 15
+; P10-LE-NEXT: stfd 0, 15(3)
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -299,52 +267,48 @@ define dso_local void @memsetTailV1B4(ptr nocapture noundef writeonly %p) local_
; P8-BE-LABEL: memsetTailV1B4:
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: vspltisb 2, 15
-; P8-BE-NEXT: lis 4, 3855
-; P8-BE-NEXT: ori 4, 4, 3855
-; P8-BE-NEXT: stw 4, 16(3)
+; P8-BE-NEXT: li 4, 16
+; P8-BE-NEXT: stxsiwx 34, 3, 4
; P8-BE-NEXT: stxvw4x 34, 0, 3
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memsetTailV1B4:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, 3855
-; P9-BE-NEXT: ori 4, 4, 3855
-; P9-BE-NEXT: stw 4, 16(3)
; P9-BE-NEXT: xxspltib 0, 15
+; P9-BE-NEXT: li 4, 16
+; P9-BE-NEXT: stfiwx 0, 3, 4
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B4:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 252645135
-; P10-BE-NEXT: stw 4, 16(3)
; P10-BE-NEXT: xxspltib 0, 15
+; P10-BE-NEXT: li 4, 16
+; P10-BE-NEXT: stfiwx 0, 3, 4
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memsetTailV1B4:
; P8-LE: # %bb.0: # %entry
; P8-LE-NEXT: vspltisb 2, 15
-; P8-LE-NEXT: lis 4, 3855
-; P8-LE-NEXT: ori 4, 4, 3855
-; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: li 4, 16
+; P8-LE-NEXT: stxsiwx 34, 3, 4
; P8-LE-NEXT: stxvd2x 34, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memsetTailV1B4:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, 3855
-; P9-LE-NEXT: ori 4, 4, 3855
-; P9-LE-NEXT: stw 4, 16(3)
; P9-LE-NEXT: xxspltib 0, 15
+; P9-LE-NEXT: li 4, 16
+; P9-LE-NEXT: stfiwx 0, 3, 4
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B4:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 252645135
-; P10-LE-NEXT: stw 4, 16(3)
; P10-LE-NEXT: xxspltib 0, 15
+; P10-LE-NEXT: li 4, 16
+; P10-LE-NEXT: stfiwx 0, 3, 4
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -356,52 +320,48 @@ define dso_local void @memsetTailV1B3(ptr nocapture noundef writeonly %p) local_
; P8-BE-LABEL: memsetTailV1B3:
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: vspltisb 2, 15
-; P8-BE-NEXT: lis 4, 3855
-; P8-BE-NEXT: ori 4, 4, 3855
+; P8-BE-NEXT: li 4, 15
+; P8-BE-NEXT: stxsiwx 34, 3, 4
; P8-BE-NEXT: stxvw4x 34, 0, 3
-; P8-BE-NEXT: stw 4, 15(3)
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memsetTailV1B3:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, 3855
-; P9-BE-NEXT: ori 4, 4, 3855
-; P9-BE-NEXT: stw 4, 15(3)
; P9-BE-NEXT: xxspltib 0, 15
+; P9-BE-NEXT: li 4, 15
+; P9-BE-NEXT: stfiwx 0, 3, 4
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B3:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 252645135
-; P10-BE-NEXT: stw 4, 15(3)
; P10-BE-NEXT: xxspltib 0, 15
+; P10-BE-NEXT: li 4, 15
+; P10-BE-NEXT: stfiwx 0, 3, 4
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memsetTailV1B3:
; P8-LE: # %bb.0: # %entry
; P8-LE-NEXT: vspltisb 2, 15
-; P8-LE-NEXT: lis 4, 3855
-; P8-LE-NEXT: ori 4, 4, 3855
-; P8-LE-NEXT: stw 4, 15(3)
+; P8-LE-NEXT: li 4, 15
+; P8-LE-NEXT: stxsiwx 34, 3, 4
; P8-LE-NEXT: stxvd2x 34, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memsetTailV1B3:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, 3855
-; P9-LE-NEXT: ori 4, 4, 3855
-; P9-LE-NEXT: stw 4, 15(3)
; P9-LE-NEXT: xxspltib 0, 15
+; P9-LE-NEXT: li 4, 15
+; P9-LE-NEXT: stfiwx 0, 3, 4
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B3:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 252645135
-; P10-LE-NEXT: stw 4, 15(3)
; P10-LE-NEXT: xxspltib 0, 15
+; P10-LE-NEXT: li 4, 15
+; P10-LE-NEXT: stfiwx 0, 3, 4
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -682,30 +642,22 @@ define dso_local void @memset2TailV1B8(ptr nocapture noundef writeonly %p) local
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: ld 4, L..C3(2) # %const.0
; P8-BE-NEXT: lxvw4x 0, 0, 4
-; P8-BE-NEXT: lis 4, -23131
-; P8-BE-NEXT: ori 4, 4, 42405
-; P8-BE-NEXT: rldimi 4, 4, 32, 0
+; P8-BE-NEXT: stfd 0, 16(3)
; P8-BE-NEXT: stxvw4x 0, 0, 3
-; P8-BE-NEXT: std 4, 16(3)
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memset2TailV1B8:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, -23131
; P9-BE-NEXT: xxspltib 0, 165
-; P9-BE-NEXT: ori 4, 4, 42405
; P9-BE-NEXT: stxv 0, 0(3)
-; P9-BE-NEXT: rldimi 4, 4, 32, 0
-; P9-BE-NEXT: std 4, 16(3)
+; P9-BE-NEXT: stfd 0, 16(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B8:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 2779096485
-; P10-BE-NEXT: rldimi 4, 4, 32, 0
-; P10-BE-NEXT: std 4, 16(3)
; P10-BE-NEXT: xxspltib 0, 165
; P10-BE-NEXT: stxv 0, 0(3)
+; P10-BE-NEXT: stfd 0, 16(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memset2TailV1B8:
@@ -713,30 +665,22 @@ define dso_local void @memset2TailV1B8(ptr nocapture noundef writeonly %p) local
; P8-LE-NEXT: addis 4, 2, .LCPI12_0 at toc@ha
; P8-LE-NEXT: addi 4, 4, .LCPI12_0 at toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
-; P8-LE-NEXT: lis 4, -23131
-; P8-LE-NEXT: ori 4, 4, 42405
-; P8-LE-NEXT: rldimi 4, 4, 32, 0
-; P8-LE-NEXT: std 4, 16(3)
+; P8-LE-NEXT: stfd 0, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memset2TailV1B8:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, -23131
; P9-LE-NEXT: xxspltib 0, 165
-; P9-LE-NEXT: ori 4, 4, 42405
; P9-LE-NEXT: stxv 0, 0(3)
-; P9-LE-NEXT: rldimi 4, 4, 32, 0
-; P9-LE-NEXT: std 4, 16(3)
+; P9-LE-NEXT: stfd 0, 16(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B8:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 2779096485
-; P10-LE-NEXT: rldimi 4, 4, 32, 0
-; P10-LE-NEXT: std 4, 16(3)
; P10-LE-NEXT: xxspltib 0, 165
; P10-LE-NEXT: stxv 0, 0(3)
+; P10-LE-NEXT: stfd 0, 16(3)
; P10-LE-NEXT: blr
entry:
tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 24, i1 false)
@@ -747,65 +691,45 @@ define dso_local void @memset2TailV1B7(ptr nocapture noundef writeonly %p) local
; P8-BE-LABEL: memset2TailV1B7:
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: ld 4, L..C4(2) # %const.0
-; P8-BE-NEXT: lis 5, -23131
; P8-BE-NEXT: lxvw4x 0, 0, 4
-; P8-BE-NEXT: ori 4, 5, 42405
-; P8-BE-NEXT: li 5, 15
-; P8-BE-NEXT: rldimi 4, 4, 32, 0
-; P8-BE-NEXT: stdx 4, 3, 5
+; P8-BE-NEXT: stfd 0, 15(3)
; P8-BE-NEXT: stxvw4x 0, 0, 3
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memset2TailV1B7:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, -23131
-; P9-BE-NEXT: li 5, 15
-; P9-BE-NEXT: ori 4, 4, 42405
-; P9-BE-NEXT: rldimi 4, 4, 32, 0
-; P9-BE-NEXT: stdx 4, 3, 5
; P9-BE-NEXT: xxspltib 0, 165
+; P9-BE-NEXT: stfd 0, 15(3)
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B7:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, 2779096485
-; P10-BE-NEXT: rldimi 4, 4, 32, 0
-; P10-BE-NEXT: pstd 4, 15(3), 0
; P10-BE-NEXT: xxspltib 0, 165
+; P10-BE-NEXT: stfd 0, 15(3)
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
; P8-LE-LABEL: memset2TailV1B7:
; P8-LE: # %bb.0: # %entry
; P8-LE-NEXT: addis 4, 2, .LCPI13_0 at toc@ha
-; P8-LE-NEXT: lis 5, -23131
; P8-LE-NEXT: addi 4, 4, .LCPI13_0 at toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
-; P8-LE-NEXT: ori 4, 5, 42405
-; P8-LE-NEXT: li 5, 15
-; P8-LE-NEXT: rldimi 4, 4, 32, 0
-; P8-LE-NEXT: stdx 4, 3, 5
+; P8-LE-NEXT: stfd 0, 15(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memset2TailV1B7:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, -23131
-; P9-LE-NEXT: li 5, 15
-; P9-LE-NEXT: ori 4, 4, 42405
-; P9-LE-NEXT: rldimi 4, 4, 32, 0
-; P9-LE-NEXT: stdx 4, 3, 5
; P9-LE-NEXT: xxspltib 0, 165
+; P9-LE-NEXT: stfd 0, 15(3)
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B7:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, 2779096485
-; P10-LE-NEXT: rldimi 4, 4, 32, 0
-; P10-LE-NEXT: pstd 4, 15(3), 0
; P10-LE-NEXT: xxspltib 0, 165
+; P10-LE-NEXT: stfd 0, 15(3)
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -818,26 +742,24 @@ define dso_local void @memset2TailV1B4(ptr nocapture noundef writeonly %p) local
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: ld 4, L..C5(2) # %const.0
; P8-BE-NEXT: lxvw4x 0, 0, 4
-; P8-BE-NEXT: lis 4, -23131
-; P8-BE-NEXT: ori 4, 4, 42405
-; P8-BE-NEXT: stw 4, 16(3)
+; P8-BE-NEXT: li 4, 16
+; P8-BE-NEXT: stfiwx 0, 3, 4
; P8-BE-NEXT: stxvw4x 0, 0, 3
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memset2TailV1B4:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, -23131
-; P9-BE-NEXT: ori 4, 4, 42405
-; P9-BE-NEXT: stw 4, 16(3)
; P9-BE-NEXT: xxspltib 0, 165
+; P9-BE-NEXT: li 4, 16
+; P9-BE-NEXT: stfiwx 0, 3, 4
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B4:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, -1515870811
-; P10-BE-NEXT: stw 4, 16(3)
; P10-BE-NEXT: xxspltib 0, 165
+; P10-BE-NEXT: li 4, 16
+; P10-BE-NEXT: stfiwx 0, 3, 4
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -846,26 +768,24 @@ define dso_local void @memset2TailV1B4(ptr nocapture noundef writeonly %p) local
; P8-LE-NEXT: addis 4, 2, .LCPI14_0 at toc@ha
; P8-LE-NEXT: addi 4, 4, .LCPI14_0 at toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
-; P8-LE-NEXT: lis 4, -23131
-; P8-LE-NEXT: ori 4, 4, 42405
-; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: li 4, 16
+; P8-LE-NEXT: stfiwx 0, 3, 4
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memset2TailV1B4:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, -23131
-; P9-LE-NEXT: ori 4, 4, 42405
-; P9-LE-NEXT: stw 4, 16(3)
; P9-LE-NEXT: xxspltib 0, 165
+; P9-LE-NEXT: li 4, 16
+; P9-LE-NEXT: stfiwx 0, 3, 4
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B4:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, -1515870811
-; P10-LE-NEXT: stw 4, 16(3)
; P10-LE-NEXT: xxspltib 0, 165
+; P10-LE-NEXT: li 4, 16
+; P10-LE-NEXT: stfiwx 0, 3, 4
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -878,26 +798,24 @@ define dso_local void @memset2TailV1B3(ptr nocapture noundef writeonly %p) local
; P8-BE: # %bb.0: # %entry
; P8-BE-NEXT: ld 4, L..C6(2) # %const.0
; P8-BE-NEXT: lxvw4x 0, 0, 4
-; P8-BE-NEXT: lis 4, -23131
-; P8-BE-NEXT: ori 4, 4, 42405
-; P8-BE-NEXT: stw 4, 15(3)
+; P8-BE-NEXT: li 4, 15
+; P8-BE-NEXT: stfiwx 0, 3, 4
; P8-BE-NEXT: stxvw4x 0, 0, 3
; P8-BE-NEXT: blr
;
; P9-BE-LABEL: memset2TailV1B3:
; P9-BE: # %bb.0: # %entry
-; P9-BE-NEXT: lis 4, -23131
-; P9-BE-NEXT: ori 4, 4, 42405
-; P9-BE-NEXT: stw 4, 15(3)
; P9-BE-NEXT: xxspltib 0, 165
+; P9-BE-NEXT: li 4, 15
+; P9-BE-NEXT: stfiwx 0, 3, 4
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B3:
; P10-BE: # %bb.0: # %entry
-; P10-BE-NEXT: pli 4, -1515870811
-; P10-BE-NEXT: stw 4, 15(3)
; P10-BE-NEXT: xxspltib 0, 165
+; P10-BE-NEXT: li 4, 15
+; P10-BE-NEXT: stfiwx 0, 3, 4
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -906,26 +824,24 @@ define dso_local void @memset2TailV1B3(ptr nocapture noundef writeonly %p) local
; P8-LE-NEXT: addis 4, 2, .LCPI15_0 at toc@ha
; P8-LE-NEXT: addi 4, 4, .LCPI15_0 at toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
-; P8-LE-NEXT: lis 4, -23131
-; P8-LE-NEXT: ori 4, 4, 42405
-; P8-LE-NEXT: stw 4, 15(3)
+; P8-LE-NEXT: li 4, 15
+; P8-LE-NEXT: stfiwx 0, 3, 4
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: memset2TailV1B3:
; P9-LE: # %bb.0: # %entry
-; P9-LE-NEXT: lis 4, -23131
-; P9-LE-NEXT: ori 4, 4, 42405
-; P9-LE-NEXT: stw 4, 15(3)
; P9-LE-NEXT: xxspltib 0, 165
+; P9-LE-NEXT: li 4, 15
+; P9-LE-NEXT: stfiwx 0, 3, 4
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B3:
; P10-LE: # %bb.0: # %entry
-; P10-LE-NEXT: pli 4, -1515870811
-; P10-LE-NEXT: stw 4, 15(3)
; P10-LE-NEXT: xxspltib 0, 165
+; P10-LE-NEXT: li 4, 15
+; P10-LE-NEXT: stfiwx 0, 3, 4
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
More information about the llvm-commits
mailing list