[llvm] r265021 - [AArch64] Handle missing store pair opportunity

Jun Bum Lim via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 31 07:47:25 PDT 2016


Author: junbuml
Date: Thu Mar 31 09:47:24 2016
New Revision: 265021

URL: http://llvm.org/viewvc/llvm-project?rev=265021&view=rev
Log:
[AArch64] Handle missing store pair opportunity

Summary:
This change will handle missing store pair opportunity where the first store
instruction stores zero followed by the non-zero store. For example, this change
will convert :

  str wzr, [x8]
  str w1, [x8, #4]
into:
  stp wzr, w1, [x8]

Reviewers: jmolloy, t.p.northover, mcrosier

Subscribers: flyingforyou, aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D18570

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=265021&r1=265020&r2=265021&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Thu Mar 31 09:47:24 2016
@@ -99,7 +99,8 @@ struct AArch64LoadStoreOpt : public Mach
   // Return the matching instruction if one is found, else MBB->end().
   MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
                                                LdStPairFlags &Flags,
-                                               unsigned Limit);
+                                               unsigned Limit,
+                                               bool FindNarrowMerge);
 
   // Scan the instructions looking for a store that writes to the address from
   // which the current load instruction reads. Return true if one is found.
@@ -757,7 +758,8 @@ AArch64LoadStoreOpt::mergeNarrowInsns(Ma
     MergeMI->eraseFromParent();
     return NextI;
   }
-  assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
+  assert(isPromotableZeroStoreInst(I) && isPromotableZeroStoreInst(MergeMI) &&
+         "Expected promotable zero store");
 
   // Construct the new instruction.
   MachineInstrBuilder MIB;
@@ -1181,7 +1183,8 @@ static bool canMergeOpc(unsigned OpcA, u
 /// current instruction into a wider equivalent or a load/store pair.
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
-                                      LdStPairFlags &Flags, unsigned Limit) {
+                                      LdStPairFlags &Flags, unsigned Limit,
+                                      bool FindNarrowMerge) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineBasicBlock::iterator MBBI = I;
   MachineInstr *FirstMI = I;
@@ -1255,26 +1258,26 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
         // safely transform. Similarly, stop if we see a hint to avoid pairs.
         if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
           return E;
-        // If the resultant immediate offset of merging these instructions
-        // is out of range for a pairwise instruction, bail and keep looking.
-        bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
-        if (!IsNarrowLoad &&
-            !inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
-          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          MemInsns.push_back(MI);
-          continue;
-        }
 
-        if (IsNarrowLoad || IsPromotableZeroStore) {
+        if (FindNarrowMerge) {
           // If the alignment requirements of the scaled wide load/store
-          // instruction can't express the offset of the scaled narrow
-          // input, bail and keep looking.
-          if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+          // instruction can't express the offset of the scaled narrow input,
+          // bail and keep looking. For promotable zero stores, allow only when
+          // the stored value is the same (i.e., WZR).
+          if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
+              (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
             trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
             MemInsns.push_back(MI);
             continue;
           }
         } else {
+          // If the resultant immediate offset of merging these instructions
+          // is out of range for a pairwise instruction, bail and keep looking.
+          if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
+            trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+            MemInsns.push_back(MI);
+            continue;
+          }
           // If the alignment requirements of the paired (scaled) instruction
           // can't express the offset of the unscaled input, bail and keep
           // looking.
@@ -1287,10 +1290,7 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
         // If the destination register of the loads is the same register, bail
         // and keep looking. A load-pair instruction with both destination
         // registers the same is UNPREDICTABLE and will result in an exception.
-        // For narrow stores, allow only when the stored value is the same
-        // (i.e., WZR).
-        if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
-            (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
+        if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
           MemInsns.push_back(MI);
           continue;
@@ -1609,7 +1609,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdSt
   // Look ahead up to LdStLimit instructions for a mergable instruction.
   LdStPairFlags Flags;
   MachineBasicBlock::iterator MergeMI =
-      findMatchingInsn(MBBI, Flags, LdStLimit);
+      findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
   if (MergeMI != E) {
     if (isNarrowLoad(MI)) {
       ++NumNarrowLoadsPromoted;
@@ -1644,7 +1644,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStI
 
   // Look ahead up to LdStLimit instructions for a pairable instruction.
   LdStPairFlags Flags;
-  MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, LdStLimit);
+  MachineBasicBlock::iterator Paired =
+      findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
   if (Paired != E) {
     ++NumPairCreated;
     if (TII->isUnscaledLdSt(MI))

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll?rev=265021&r1=265020&r2=265021&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll Thu Mar 31 09:47:24 2016
@@ -353,8 +353,8 @@ entry:
   ret void
 }
 
-;CHECK-LABEL: Strw_zero
-;CHECK : str xzr
+; CHECK-LABEL: Strw_zero
+; CHECK: str xzr
 define void @Strw_zero(i32* nocapture %P, i32 %n) {
 entry:
   %idxprom = sext i32 %n to i64
@@ -367,8 +367,22 @@ entry:
   ret void
 }
 
-;CHECK-LABEL: Strw_zero_4
-;CHECK : stp xzr
+; CHECK-LABEL: Strw_zero_nonzero
+; CHECK: stp wzr, w1
+define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n)  {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+  store i32 %n, i32* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Strw_zero_4
+; CHECK: stp xzr
 define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
 entry:
   %idxprom = sext i32 %n to i64
@@ -442,8 +456,8 @@ entry:
   ret void
 }
 
-;CHECK-LABEL: Sturw_zero
-;CHECK : stur xzr
+; CHECK-LABEL: Sturw_zero
+; CHECK: stur xzr
 define void @Sturw_zero(i32* nocapture %P, i32 %n) {
 entry:
   %sub = add nsw i32 %n, -3
@@ -457,8 +471,8 @@ entry:
   ret void
 }
 
-;CHECK-LABEL: Sturw_zero_4
-;CHECK : str xzr
+; CHECK-LABEL: Sturw_zero_4
+; CHECK: stp xzr, xzr
 define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
 entry:
   %sub = add nsw i32 %n, -3




More information about the llvm-commits mailing list