[llvm] r216629 - [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.

Wed Aug 27 15:52:33 PDT 2014

Author: ributzka
Date: Wed Aug 27 17:52:33 2014
New Revision: 216629

URL: http://llvm.org/viewvc/llvm-project?rev=216629&view=rev
Log:
[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.

Currently instructions are folded very aggressively into the memory operation,
which can lead to the use of killed operands:
  %vreg1<def> = ADDXri %vreg0<kill>, 2
  %vreg2<def> = LDRBBui %vreg0, 2
  ... = ... %vreg1 ...

This usually happens when the result is also used by another non-memory
instruction in the same basic block, or any instruction in another basic block.

If the computed address is used by only memory operations in the same basic
block, then it is safe to fold them. This is because all memory operations will
fold the address computation and the original computation will never be emitted.

This fixes rdar://problem/18142857.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216629&r1=216628&r2=216629&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 27 17:52:33 2014
@@ -134,7 +134,11 @@ private:
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
-  bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+  bool isLegalToFoldAddress(const Value *Obj);
+  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+  bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
+  bool computeAddressBase(const Value *Obj, Address &Addr);
+
   bool ComputeCallAddress(const Value *V, Address &Addr);
   bool SimplifyAddress(Address &Addr, MVT VT);
   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
@@ -416,9 +420,68 @@ unsigned AArch64FastISel::TargetMaterial
   return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 }
 
+bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
+  // Look through BitCast, IntToPtr, and PtrToInt.
+  const User *U = nullptr;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const auto *I = dyn_cast<Instruction>(Obj)) {
+    // Bail out if the result is used in a different basic block.
+    if (FuncInfo.isExportedInst(I))
+      return false;
+
+    Opcode = I->getOpcode();
+    U = I;
+  } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
+    Opcode = CE->getOpcode();
+    U = CE;
+  }
+
+  switch (Opcode) {
+  default:
+    break;
+  case Instruction::BitCast:
+    return isLegalToFoldAddress(U->getOperand(0));
+  case Instruction::IntToPtr:
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return isLegalToFoldAddress(U->getOperand(0));
+    break;
+  case Instruction::PtrToInt:
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return isLegalToFoldAddress(U->getOperand(0));
+    break;
+  }
+
+  // Allocas never kill their operands, so it is safe to fold it.
+  if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
+    return true;
+
+  const auto *I = cast<Instruction>(Obj);
+  // Trivial case - the memory instruction is the only user.
+  if (I->hasOneUse())
+    return true;
+
+  // Check all users - if all of them are memory instructions that FastISel
+  // can handle, then it is safe to fold the instruction.
+  for (auto *U : I->users())
+    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+      return false;
+
+  return true;
+}
+
 // Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
-{
+bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
+                                     Type *Ty) {
+  // Don't fold instructions into the memory operation if their result is
+  // exported to another basic block or has more than one use - except if all
+  // uses are memory operations.
+  if (isLegalToFoldAddress(Obj))
+    return computeAddressRecursively(Obj, Addr, Ty);
+  return computeAddressBase(Obj, Addr);
+}
+
+bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
+                                                Type *Ty) {
   const User *U = nullptr;
   unsigned Opcode = Instruction::UserOp1;
   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -445,18 +508,18 @@ bool AArch64FastISel::ComputeAddress(con
     break;
   case Instruction::BitCast: {
     // Look through bitcasts.
-    return ComputeAddress(U->getOperand(0), Addr, Ty);
+    return computeAddressRecursively(U->getOperand(0), Addr, Ty);
   }
   case Instruction::IntToPtr: {
     // Look past no-op inttoptrs.
     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr, Ty);
+      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
     break;
   }
   case Instruction::PtrToInt: {
-    // Look past no-op ptrtoints.
+    // Look past no-op ptrtoints. Don't increment recursion level.
     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr, Ty);
+      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
     break;
   }
   case Instruction::GetElementPtr: {
@@ -498,7 +561,7 @@ bool AArch64FastISel::ComputeAddress(con
 
     // Try to grab the base operand now.
     Addr.setOffset(TmpOffset);
-    if (ComputeAddress(U->getOperand(0), Addr, Ty))
+    if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
       return true;
 
     // We failed, restore everything and try the other options.
@@ -519,6 +582,9 @@ bool AArch64FastISel::ComputeAddress(con
     break;
   }
   case Instruction::Add: {
+    if (!U->hasOneUse())
+      break;
+
     // Adds of constants are common and easy enough.
     const Value *LHS = U->getOperand(0);
     const Value *RHS = U->getOperand(1);
@@ -528,17 +594,21 @@ bool AArch64FastISel::ComputeAddress(con
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
-      return ComputeAddress(LHS, Addr, Ty);
+      return computeAddressRecursively(LHS, Addr, Ty);
     }
 
     Address Backup = Addr;
-    if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
+    if (computeAddressRecursively(LHS, Addr, Ty) &&
+        computeAddressRecursively(RHS, Addr, Ty))
       return true;
     Addr = Backup;
 
     break;
   }
   case Instruction::Shl:
+    if (!U->hasOneUse())
+      break;
+
     if (Addr.getOffsetReg())
       break;
 
@@ -561,8 +631,10 @@ bool AArch64FastISel::ComputeAddress(con
       Addr.setShift(Val);
       Addr.setExtendType(AArch64_AM::LSL);
 
+      // Only try to fold the operand if it has one use.
       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
-        if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+        if (I->hasOneUse() &&
+            (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
           U = I;
 
       if (const auto *ZE = dyn_cast<ZExtInst>(U))
@@ -582,6 +654,10 @@ bool AArch64FastISel::ComputeAddress(con
     break;
   }
 
+  return computeAddressBase(Obj, Addr);
+}
+
+bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
   if (Addr.getReg()) {
     if (!Addr.getOffsetReg()) {
       unsigned Reg = getRegForValue(Obj);
@@ -1352,7 +1428,7 @@ bool AArch64FastISel::SelectLoad(const I
 
   // See if we can handle this address.
   Address Addr;
-  if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
+  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
     return false;
 
   unsigned ResultReg;
@@ -1469,7 +1545,7 @@ bool AArch64FastISel::SelectStore(const
 
   // See if we can handle this address.
   Address Addr;
-  if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
     return false;
 
   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
@@ -2377,7 +2453,7 @@ bool AArch64FastISel::FastLowerIntrinsic
     if (MTI->isVolatile())
       return false;
 
-    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
+    // Disable inlining for memmove before calls to computeAddress.  Otherwise,
     // we would emit dead code because we don't currently handle memmoves.
     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
@@ -2387,8 +2463,8 @@ bool AArch64FastISel::FastLowerIntrinsic
       unsigned Alignment = MTI->getAlignment();
       if (IsMemCpySmall(Len, Alignment)) {
         Address Dest, Src;
-        if (!ComputeAddress(MTI->getRawDest(), Dest) ||
-            !ComputeAddress(MTI->getRawSource(), Src))
+        if (!computeAddress(MTI->getRawDest(), Dest) ||
+            !computeAddress(MTI->getRawSource(), Src))
           return false;
         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
           return true;

Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=216629&r1=216628&r2=216629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 27 17:52:33 2014
@@ -281,6 +281,50 @@ define i64 @load_breg_immoff_8(i64 %a) {
   ret i64 %3
 }
 
+; Allow folding of the address if it is used by memory instructions only.
+define void @load_breg_immoff_9(i64 %a) {
+; FAST-LABEL: load_breg_immoff_9
+; FAST:       ldr {{x[0-9]+}}, [x0, #96]
+; FAST:       str {{x[0-9]+}}, [x0, #96]
+  %1 = add i64 %a, 96
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  %4 = add i64 %3, 1
+  store i64 %4, i64* %2
+  ret void
+}
+
+; Don't fold if the add result leaves the basic block - even if the user is a
+; memory operation.
+define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
+; FAST-LABEL: load_breg_immoff_10
+; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 96
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %4 = shl i64 %1, 3
+  %5 = inttoptr i64 %4 to i64*
+  %res = load i64* %5
+  ret i64 %res
+bb2:
+  ret i64 %3
+}
+
+; Don't allow folding of the address if it is used by non-memory instructions.
+define i64 @load_breg_immoff_11(i64 %a) {
+; FAST-LABEL: load_breg_immoff_11
+; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 96
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  %4 = add i64 %1, %3
+  ret i64 %4
+}
+
 ; Load Base Register + Register Offset
 define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_offreg_1
@@ -301,6 +345,33 @@ define i64 @load_breg_offreg_2(i64 %a, i
   ret i64 %3
 }
 
+; Don't fold if the add result leaves the basic block.
+define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_offreg_3
+; FAST:       add [[REG:x[0-9]+]], x0, x1
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %res = load i64* %2
+  ret i64 %res
+bb2:
+  ret i64 %3
+}
+
+define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_offreg_4
+; FAST:       add [[REG:x[0-9]+]], x0, x1
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  %4 = add i64 %1, %3
+  ret i64 %4
+}
+
 ; Load Base Register + Register Offset + Immediate Offset
 define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_offreg_immoff_1
@@ -405,6 +476,35 @@ define i32 @load_breg_shift_offreg_5(i64
   ret i32 %5
 }
 
+; Don't fold if the shift result leaves the basic block.
+define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_shift_offreg_6
+; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
+  %1 = shl i64 %a, 3
+  %2 = add i64 %b, %1
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %5 = inttoptr i64 %1 to i64*
+  %res = load i64* %5
+  ret i64 %res
+bb2:
+  ret i64 %4
+}
+
+define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
+; FAST-LABEL: load_breg_shift_offreg_7
+; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
+  %1 = shl i64 %a, 3
+  %2 = add i64 %b, %1
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  %5 = add i64 %1, %4
+  ret i64 %5
+}
 
 ; Load Base Register + Scaled Register Offset + Sign/Zero extension
 define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
@@ -429,6 +529,36 @@ define i32 @load_breg_zext_shift_offreg_
   ret i32 %5
 }
 
+; Don't fold if the zext result leaves the basic block.
+define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_zext_shift_offreg_3
+; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %6 = inttoptr i64 %1 to i64*
+  %res = load i64* %6
+  ret i64 %res
+bb2:
+  ret i64 %5
+}
+
+define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
+; FAST-LABEL: load_breg_zext_shift_offreg_4
+; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  %6 = add i64 %1, %5
+  ret i64 %6
+}
+
 define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_sext_shift_offreg_1
 ; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]