[llvm] r248804 - [AArch64] Scale offsets by the size of the memory operation. NFC.

Tue Sep 29 09:07:33 PDT 2015

Author: mcrosier
Date: Tue Sep 29 11:07:32 2015
New Revision: 248804

URL: http://llvm.org/viewvc/llvm-project?rev=248804&view=rev
Log:
[AArch64] Scale offsets by the size of the memory operation. NFC.

The immediate in the load/store should be scaled by the size of the memory
operation, not the size of the register being loaded/stored.  This change gets
us one step closer to forming LDPSW instructions.  This change also enables
pre- and post-indexing for halfword and byte loads and stores.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=248804&r1=248803&r2=248804&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Tue Sep 29 11:07:32 2015
@@ -164,8 +164,8 @@ static bool isUnscaledLdSt(MachineInstr
   return isUnscaledLdSt(MI->getOpcode());
 }
 
-// Size in bytes of the data moved by an unscaled load or store.
-static int getMemSize(MachineInstr *MI) {
+// Scaling factor for unscaled load or store.
+static int getMemScale(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     llvm_unreachable("Opcode has unknown size!");
@@ -179,6 +179,10 @@ static int getMemSize(MachineInstr *MI)
   case AArch64::STURSi:
   case AArch64::STRWui:
   case AArch64::STURWi:
+  case AArch64::LDPSi:
+  case AArch64::LDPWi:
+  case AArch64::STPSi:
+  case AArch64::STPWi:
     return 4;
   case AArch64::LDRDui:
   case AArch64::LDURDi:
@@ -188,11 +192,17 @@ static int getMemSize(MachineInstr *MI)
   case AArch64::STURDi:
   case AArch64::STRXui:
   case AArch64::STURXi:
+  case AArch64::LDPDi:
+  case AArch64::LDPXi:
+  case AArch64::STPDi:
+  case AArch64::STPXi:
     return 8;
   case AArch64::LDRQui:
   case AArch64::LDURQi:
   case AArch64::STRQui:
   case AArch64::STURQi:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
     return 16;
   }
 }
@@ -425,7 +435,7 @@ AArch64LoadStoreOpt::mergePairedInsns(Ma
       SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
   bool IsUnscaled = isUnscaledLdSt(Opc);
   int OffsetStride =
-      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
+      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(I) : 1;
 
   bool MergeForward = Flags.getMergeForward();
   unsigned NewOpc = getMatchingPairOpcode(Opc);
@@ -613,7 +623,7 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
   // range, plus allow an extra one in case we find a later insn that matches
   // with Offset-1)
   int OffsetStride =
-      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
+      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(FirstMI) : 1;
   if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
     return E;
 
@@ -774,8 +784,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(Mac
               .addImm(Value);
   } else {
     // Paired instruction.
-    const MachineFunction &MF = *I->getParent()->getParent();
-    int Scale = TII->getRegClass(I->getDesc(), 0, TRI, MF)->getSize();
+    int Scale = getMemScale(I);
     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
               .addOperand(getLdStRegOp(Update))
               .addOperand(getLdStRegOp(I, 0))
@@ -840,8 +849,7 @@ bool AArch64LoadStoreOpt::isMatchingUpda
     // the scaling factor.  The scaled offset must also fit into a signed 7-bit
     // integer.
     if (IsPairedInsn) {
-      const MachineFunction &MF = *MemMI->getParent()->getParent();
-      int Scale = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+      int Scale = getMemScale(MemMI);
       if (UpdateOffset % Scale != 0)
         break;
 
@@ -864,11 +872,9 @@ MachineBasicBlock::iterator AArch64LoadS
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
   MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
 
   unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
-  int Offset = getLdStOffsetOp(MemMI).getImm() *
-               TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+  int Offset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
 
   // If the base register overlaps a destination register, we can't
   // merge the update.
@@ -922,11 +928,10 @@ MachineBasicBlock::iterator AArch64LoadS
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
   MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
 
   unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
   int Offset = getLdStOffsetOp(MemMI).getImm();
-  unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+  unsigned MemSize = getMemScale(MemMI);
 
   // If the load/store is the first instruction in the block, there's obviously
   // not any matching update. Ditto if the memory offset isn't zero.
@@ -958,7 +963,7 @@ MachineBasicBlock::iterator AArch64LoadS
     ++Count;
 
     // If we found a match, return it.
-    if (isMatchingUpdateInsn(I, MI, BaseReg, RegSize))
+    if (isMatchingUpdateInsn(I, MI, BaseReg, MemSize))
       return MBBI;
 
     // Update the status of what the instruction clobbered and used.
@@ -1147,9 +1152,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(
       // The immediate in the load/store is scaled by the size of the register
       // being loaded. The immediate in the add we're looking for,
       // however, is not, so adjust here.
-      int Value = MI->getOperand(isPairedLdSt(MI) ? 3 : 2).getImm() *
-                  TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
-                      ->getSize();
+      int Value =
+          MI->getOperand(isPairedLdSt(MI) ? 3 : 2).getImm() * getMemScale(MI);
 
       // FIXME: The immediate in the load/store should be scaled by the size of
       // the memory operation, not the size of the register being loaded/stored.