[llvm] r260682 - [AArch64] Merge two adjacent str WZR into str XZR
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 07:25:41 PST 2016
Author: junbuml
Date: Fri Feb 12 09:25:39 2016
New Revision: 260682
URL: http://llvm.org/viewvc/llvm-project?rev=260682&view=rev
Log:
[AArch64] Merge two adjacent str WZR into str XZR
Summary:
This change merges adjacent 32 bit zero stores into a 64 bit zero store.
e.g.,
str wzr, [x0]
str wzr, [x0, #4]
becomes
str xzr, [x0]
Therefore, four adjacent 32 bit zero stores will be a single stp.
e.g.,
str wzr, [x0]
str wzr, [x0, #4]
str wzr, [x0, #8]
str wzr, [x0, #12]
becomes
stp xzr, xzr, [x0]
Reviewers: mcrosier, jmolloy, gberry, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D16933
Modified:
llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=260682&r1=260681&r2=260682&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri Feb 12 09:25:39 2016
@@ -235,10 +235,6 @@ static bool isNarrowStore(unsigned Opc)
}
}
-static bool isNarrowStore(MachineInstr *MI) {
- return isNarrowStore(MI->getOpcode());
-}
-
static bool isNarrowLoad(unsigned Opc) {
switch (Opc) {
default:
@@ -386,6 +382,10 @@ static unsigned getMatchingWideOpcode(un
return AArch64::STURHHi;
case AArch64::STURHHi:
return AArch64::STURWi;
+ case AArch64::STURWi:
+ return AArch64::STURXi;
+ case AArch64::STRWui:
+ return AArch64::STRXui;
case AArch64::LDRHHui:
case AArch64::LDRSHWui:
return AArch64::LDRWui;
@@ -640,6 +640,16 @@ static bool isLdOffsetInRangeOfSt(Machin
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
+static bool isPromotableZeroStoreOpcode(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
+}
+
+static bool isPromotableZeroStoreInst(MachineInstr *MI) {
+ return (isPromotableZeroStoreOpcode(MI)) &&
+ getLdStRegOp(MI).getReg() == AArch64::WZR;
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MergeMI,
@@ -775,12 +785,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(Ma
MergeMI->eraseFromParent();
return NextI;
}
- assert(isNarrowStore(Opc) && "Expected narrow store");
+ assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
// Construct the new instruction.
MachineInstrBuilder MIB;
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
- .addOperand(getLdStRegOp(I))
+ .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
.addOperand(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
@@ -1211,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
- bool IsNarrowStore = isNarrowStore(Opc);
+ bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
@@ -1282,7 +1292,7 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
continue;
}
- if (IsNarrowLoad || IsNarrowStore) {
+ if (IsNarrowLoad || IsPromotableZeroStore) {
// If the alignment requirements of the scaled wide load/store
// instruction can't express the offset of the scaled narrow
// input, bail and keep looking.
@@ -1307,7 +1317,7 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
// For narrow stores, allow only when the stored value is the same
// (i.e., WZR).
if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
- (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
+ (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(MI);
continue;
@@ -1633,24 +1643,27 @@ bool AArch64LoadStoreOpt::isCandidateToM
// store.
bool AArch64LoadStoreOpt::tryToMergeLdStInst(
MachineBasicBlock::iterator &MBBI) {
- assert((isNarrowLoad(MBBI) || isNarrowStore(MBBI)) && "Expected narrow op.");
+ assert((isNarrowLoad(MBBI) || isPromotableZeroStoreOpcode(MBBI)) &&
+ "Expected narrow op.");
MachineInstr *MI = MBBI;
MachineBasicBlock::iterator E = MI->getParent()->end();
if (!isCandidateToMergeOrPair(MI))
return false;
- // For narrow stores, find only the case where the stored value is WZR.
- if (isNarrowStore(MI) && getLdStRegOp(MI).getReg() != AArch64::WZR)
+ // For promotable zero stores, the stored value should be WZR.
+ if (isPromotableZeroStoreOpcode(MI) &&
+ getLdStRegOp(MI).getReg() != AArch64::WZR)
return false;
// Look ahead up to LdStLimit instructions for a mergable instruction.
LdStPairFlags Flags;
- MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit);
+ MachineBasicBlock::iterator MergeMI =
+ findMatchingInsn(MBBI, Flags, LdStLimit);
if (MergeMI != E) {
if (isNarrowLoad(MI)) {
++NumNarrowLoadsPromoted;
- } else if (isNarrowStore(MI)) {
+ } else if (isPromotableZeroStoreInst(MI)) {
++NumZeroStoresPromoted;
}
// Keeping the iterator straight is a pain, so we let the merge routine tell
@@ -1765,13 +1778,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(
case AArch64::LDRSHWui:
case AArch64::STRBBui:
case AArch64::STRHHui:
+ case AArch64::STRWui:
// Unscaled instructions.
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURSBWi:
case AArch64::LDURSHWi:
case AArch64::STURBBi:
- case AArch64::STURHHi: {
+ case AArch64::STURHHi:
+ case AArch64::STURWi: {
if (tryToMergeLdStInst(MBBI)) {
Modified = true;
break;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll?rev=260682&r1=260681&r2=260682&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll Fri Feb 12 09:25:39 2016
@@ -352,6 +352,42 @@ entry:
ret void
}
+;CHECK-LABEL: Strw_zero
+;CHECK : str xzr
+define void @Strw_zero(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 0, i32* %arrayidx2
+ ret void
+}
+
+;CHECK-LABEL: Strw_zero_4
+;CHECK : stp xzr
+define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 0, i32* %arrayidx2
+ %add3 = add nsw i32 %n, 2
+ %idxprom4 = sext i32 %add3 to i64
+ %arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
+ store i32 0, i32* %arrayidx5
+ %add6 = add nsw i32 %n, 3
+ %idxprom7 = sext i32 %add6 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
+ store i32 0, i32* %arrayidx8
+ ret void
+}
+
; CHECK-LABEL: Sturb_zero
; CHECK: sturh wzr
define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
@@ -404,3 +440,42 @@ entry:
store i16 0, i16* %arrayidx9
ret void
}
+
+;CHECK-LABEL: Sturw_zero
+;CHECK : stur xzr
+define void @Sturw_zero(i32* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -3
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %sub1 = add nsw i32 %n, -4
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+ store i32 0, i32* %arrayidx3
+ ret void
+}
+
+;CHECK-LABEL: Sturw_zero_4
+;CHECK : str xzr
+define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -3
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %sub1 = add nsw i32 %n, -4
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+ store i32 0, i32* %arrayidx3
+ %sub4 = add nsw i32 %n, -2
+ %idxprom5 = sext i32 %sub4 to i64
+ %arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
+ store i32 0, i32* %arrayidx6
+ %sub7 = add nsw i32 %n, -1
+ %idxprom8 = sext i32 %sub7 to i64
+ %arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
+ store i32 0, i32* %arrayidx9
+ ret void
+}
+
More information about the llvm-commits
mailing list