[llvm] r216629 - [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.

Wed Aug 27 16:23:56 PDT 2014

Hi Quentin,

thanks for pointing this out - this is way better.
I reverted it in r216632 and I will use MachineRegisterInfo::clearKillFlags as you suggested.

Cheers,
Juergen

On Aug 27, 2014, at 4:12 PM, Quentin Colombet <qcolombet at apple.com> wrote:

> Hi Juergen,
> 
> I can understand that we may not want to fold something in an addressing mode for performance reasons, but if this is to avoid a use of a kill operand that sound like the wrong approach to me.
> 
> If you extend the live-range of an operand, just make sure to use MachineRegisterInfo::clearKillFlags.
> 
> Cheers,
> -Quentin
> 
> On Aug 27, 2014, at 3:52 PM, Juergen Ributzka <juergen at apple.com> wrote:
> 
>> Author: ributzka
>> Date: Wed Aug 27 17:52:33 2014
>> New Revision: 216629
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=216629&view=rev
>> Log:
>> [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.
>> 
>> Currently instructions are folded very aggressively into the memory operation,
>> which can lead to the use of killed operands:
>> %vreg1<def> = ADDXri %vreg0<kill>, 2
>> %vreg2<def> = LDRBBui %vreg0, 2
>> ... = ... %vreg1 ...
>> 
>> This usually happens when the result is also used by another non-memory
>> instruction in the same basic block, or any instruction in another basic block.
>> 
>> If the computed address is used by only memory operations in the same basic
>> block, then it is safe to fold them. This is because all memory operations will
>> fold the address computation and the original computation will never be emitted.
>> 
>> This fixes rdar://problem/18142857.
>> 
>> Modified:
>>   llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
>>   llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
>> 
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216629&r1=216628&r2=216629&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 27 17:52:33 2014
>> @@ -134,7 +134,11 @@ private:
>>  // Utility helper routines.
>>  bool isTypeLegal(Type *Ty, MVT &VT);
>>  bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
>> -  bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
>> +  bool isLegalToFoldAddress(const Value *Obj);
>> +  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
>> +  bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
>> +  bool computeAddressBase(const Value *Obj, Address &Addr);
>> +
>>  bool ComputeCallAddress(const Value *V, Address &Addr);
>>  bool SimplifyAddress(Address &Addr, MVT VT);
>>  void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
>> @@ -416,9 +420,68 @@ unsigned AArch64FastISel::TargetMaterial
>>  return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
>> }
>> 
>> +bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
>> +  // Look through BitCast, IntToPtr, and PtrToInt.
>> +  const User *U = nullptr;
>> +  unsigned Opcode = Instruction::UserOp1;
>> +  if (const auto *I = dyn_cast<Instruction>(Obj)) {
>> +    // Bail out if the result is used in a different basic block.
>> +    if (FuncInfo.isExportedInst(I))
>> +      return false;
>> +
>> +    Opcode = I->getOpcode();
>> +    U = I;
>> +  } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
>> +    Opcode = CE->getOpcode();
>> +    U = CE;
>> +  }
>> +
>> +  switch (Opcode) {
>> +  default:
>> +    break;
>> +  case Instruction::BitCast:
>> +    return isLegalToFoldAddress(U->getOperand(0));
>> +  case Instruction::IntToPtr:
>> +    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
>> +      return isLegalToFoldAddress(U->getOperand(0));
>> +    break;
>> +  case Instruction::PtrToInt:
>> +    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
>> +      return isLegalToFoldAddress(U->getOperand(0));
>> +    break;
>> +  }
>> +
>> +  // Allocas never kill their operands, so it is safe to fold it.
>> +  if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
>> +    return true;
>> +
>> +  const auto *I = cast<Instruction>(Obj);
>> +  // Trivial case - the memory instruction is the only user.
>> +  if (I->hasOneUse())
>> +    return true;
>> +
>> +  // Check all users - if all of them are memory instructions that FastISel
>> +  // can handle, then it is safe to fold the instruction.
>> +  for (auto *U : I->users())
>> +    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
>> +      return false;
>> +
>> +  return true;
>> +}
>> +
>> // Computes the address to get to an object.
>> -bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
>> -{
>> +bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
>> +                                     Type *Ty) {
>> +  // Don't fold instructions into the memory operation if their result is
>> +  // exported to another basic block or has more than one use - except if all
>> +  // uses are memory operations.
>> +  if (isLegalToFoldAddress(Obj))
>> +    return computeAddressRecursively(Obj, Addr, Ty);
>> +  return computeAddressBase(Obj, Addr);
>> +}
>> +
>> +bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
>> +                                                Type *Ty) {
>>  const User *U = nullptr;
>>  unsigned Opcode = Instruction::UserOp1;
>>  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
>> @@ -445,18 +508,18 @@ bool AArch64FastISel::ComputeAddress(con
>>    break;
>>  case Instruction::BitCast: {
>>    // Look through bitcasts.
>> -    return ComputeAddress(U->getOperand(0), Addr, Ty);
>> +    return computeAddressRecursively(U->getOperand(0), Addr, Ty);
>>  }
>>  case Instruction::IntToPtr: {
>>    // Look past no-op inttoptrs.
>>    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
>> -      return ComputeAddress(U->getOperand(0), Addr, Ty);
>> +      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
>>    break;
>>  }
>>  case Instruction::PtrToInt: {
>> -    // Look past no-op ptrtoints.
>> +    // Look past no-op ptrtoints. Don't increment recursion level.
>>    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
>> -      return ComputeAddress(U->getOperand(0), Addr, Ty);
>> +      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
>>    break;
>>  }
>>  case Instruction::GetElementPtr: {
>> @@ -498,7 +561,7 @@ bool AArch64FastISel::ComputeAddress(con
>> 
>>    // Try to grab the base operand now.
>>    Addr.setOffset(TmpOffset);
>> -    if (ComputeAddress(U->getOperand(0), Addr, Ty))
>> +    if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
>>      return true;
>> 
>>    // We failed, restore everything and try the other options.
>> @@ -519,6 +582,9 @@ bool AArch64FastISel::ComputeAddress(con
>>    break;
>>  }
>>  case Instruction::Add: {
>> +    if (!U->hasOneUse())
>> +      break;
>> +
>>    // Adds of constants are common and easy enough.
>>    const Value *LHS = U->getOperand(0);
>>    const Value *RHS = U->getOperand(1);
>> @@ -528,17 +594,21 @@ bool AArch64FastISel::ComputeAddress(con
>> 
>>    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
>>      Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
>> -      return ComputeAddress(LHS, Addr, Ty);
>> +      return computeAddressRecursively(LHS, Addr, Ty);
>>    }
>> 
>>    Address Backup = Addr;
>> -    if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
>> +    if (computeAddressRecursively(LHS, Addr, Ty) &&
>> +        computeAddressRecursively(RHS, Addr, Ty))
>>      return true;
>>    Addr = Backup;
>> 
>>    break;
>>  }
>>  case Instruction::Shl:
>> +    if (!U->hasOneUse())
>> +      break;
>> +
>>    if (Addr.getOffsetReg())
>>      break;
>> 
>> @@ -561,8 +631,10 @@ bool AArch64FastISel::ComputeAddress(con
>>      Addr.setShift(Val);
>>      Addr.setExtendType(AArch64_AM::LSL);
>> 
>> +      // Only try to fold the operand if it has one use.
>>      if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
>> -        if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
>> +        if (I->hasOneUse() &&
>> +            (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
>>          U = I;
>> 
>>      if (const auto *ZE = dyn_cast<ZExtInst>(U))
>> @@ -582,6 +654,10 @@ bool AArch64FastISel::ComputeAddress(con
>>    break;
>>  }
>> 
>> +  return computeAddressBase(Obj, Addr);
>> +}
>> +
>> +bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
>>  if (Addr.getReg()) {
>>    if (!Addr.getOffsetReg()) {
>>      unsigned Reg = getRegForValue(Obj);
>> @@ -1352,7 +1428,7 @@ bool AArch64FastISel::SelectLoad(const I
>> 
>>  // See if we can handle this address.
>>  Address Addr;
>> -  if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
>> +  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
>>    return false;
>> 
>>  unsigned ResultReg;
>> @@ -1469,7 +1545,7 @@ bool AArch64FastISel::SelectStore(const
>> 
>>  // See if we can handle this address.
>>  Address Addr;
>> -  if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
>> +  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
>>    return false;
>> 
>>  if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
>> @@ -2377,7 +2453,7 @@ bool AArch64FastISel::FastLowerIntrinsic
>>    if (MTI->isVolatile())
>>      return false;
>> 
>> -    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
>> +    // Disable inlining for memmove before calls to computeAddress.  Otherwise,
>>    // we would emit dead code because we don't currently handle memmoves.
>>    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
>>    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
>> @@ -2387,8 +2463,8 @@ bool AArch64FastISel::FastLowerIntrinsic
>>      unsigned Alignment = MTI->getAlignment();
>>      if (IsMemCpySmall(Len, Alignment)) {
>>        Address Dest, Src;
>> -        if (!ComputeAddress(MTI->getRawDest(), Dest) ||
>> -            !ComputeAddress(MTI->getRawSource(), Src))
>> +        if (!computeAddress(MTI->getRawDest(), Dest) ||
>> +            !computeAddress(MTI->getRawSource(), Src))
>>          return false;
>>        if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
>>          return true;
>> 
>> Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=216629&r1=216628&r2=216629&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (original)
>> +++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 27 17:52:33 2014
>> @@ -281,6 +281,50 @@ define i64 @load_breg_immoff_8(i64 %a) {
>>  ret i64 %3
>> }
>> 
>> +; Allow folding of the address if it is used by memory instructions only.
>> +define void @load_breg_immoff_9(i64 %a) {
>> +; FAST-LABEL: load_breg_immoff_9
>> +; FAST:       ldr {{x[0-9]+}}, [x0, #96]
>> +; FAST:       str {{x[0-9]+}}, [x0, #96]
>> +  %1 = add i64 %a, 96
>> +  %2 = inttoptr i64 %1 to i64*
>> +  %3 = load i64* %2
>> +  %4 = add i64 %3, 1
>> +  store i64 %4, i64* %2
>> +  ret void
>> +}
>> +
>> +; Don't fold if the add result leaves the basic block - even if the user is a
>> +; memory operation.
>> +define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
>> +; FAST-LABEL: load_breg_immoff_10
>> +; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
>> +  %1 = add i64 %a, 96
>> +  %2 = inttoptr i64 %1 to i64*
>> +  %3 = load i64* %2
>> +  br i1 %c, label %bb1, label %bb2
>> +bb1:
>> +  %4 = shl i64 %1, 3
>> +  %5 = inttoptr i64 %4 to i64*
>> +  %res = load i64* %5
>> +  ret i64 %res
>> +bb2:
>> +  ret i64 %3
>> +}
>> +
>> +; Don't allow folding of the address if it is used by non-memory instructions.
>> +define i64 @load_breg_immoff_11(i64 %a) {
>> +; FAST-LABEL: load_breg_immoff_11
>> +; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
>> +  %1 = add i64 %a, 96
>> +  %2 = inttoptr i64 %1 to i64*
>> +  %3 = load i64* %2
>> +  %4 = add i64 %1, %3
>> +  ret i64 %4
>> +}
>> +
>> ; Load Base Register + Register Offset
>> define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
>> ; CHECK-LABEL: load_breg_offreg_1
>> @@ -301,6 +345,33 @@ define i64 @load_breg_offreg_2(i64 %a, i
>>  ret i64 %3
>> }
>> 
>> +; Don't fold if the add result leaves the basic block.
>> +define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
>> +; FAST-LABEL: load_breg_offreg_3
>> +; FAST:       add [[REG:x[0-9]+]], x0, x1
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
>> +  %1 = add i64 %a, %b
>> +  %2 = inttoptr i64 %1 to i64*
>> +  %3 = load i64* %2
>> +  br i1 %c, label %bb1, label %bb2
>> +bb1:
>> +  %res = load i64* %2
>> +  ret i64 %res
>> +bb2:
>> +  ret i64 %3
>> +}
>> +
>> +define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
>> +; FAST-LABEL: load_breg_offreg_4
>> +; FAST:       add [[REG:x[0-9]+]], x0, x1
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
>> +  %1 = add i64 %a, %b
>> +  %2 = inttoptr i64 %1 to i64*
>> +  %3 = load i64* %2
>> +  %4 = add i64 %1, %3
>> +  ret i64 %4
>> +}
>> +
>> ; Load Base Register + Register Offset + Immediate Offset
>> define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
>> ; CHECK-LABEL: load_breg_offreg_immoff_1
>> @@ -405,6 +476,35 @@ define i32 @load_breg_shift_offreg_5(i64
>>  ret i32 %5
>> }
>> 
>> +; Don't fold if the shift result leaves the basic block.
>> +define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
>> +; FAST-LABEL: load_breg_shift_offreg_6
>> +; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
>> +  %1 = shl i64 %a, 3
>> +  %2 = add i64 %b, %1
>> +  %3 = inttoptr i64 %2 to i64*
>> +  %4 = load i64* %3
>> +  br i1 %c, label %bb1, label %bb2
>> +bb1:
>> +  %5 = inttoptr i64 %1 to i64*
>> +  %res = load i64* %5
>> +  ret i64 %res
>> +bb2:
>> +  ret i64 %4
>> +}
>> +
>> +define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
>> +; FAST-LABEL: load_breg_shift_offreg_7
>> +; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
>> +; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
>> +  %1 = shl i64 %a, 3
>> +  %2 = add i64 %b, %1
>> +  %3 = inttoptr i64 %2 to i64*
>> +  %4 = load i64* %3
>> +  %5 = add i64 %1, %4
>> +  ret i64 %5
>> +}
>> 
>> ; Load Base Register + Scaled Register Offset + Sign/Zero extension
>> define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
>> @@ -429,6 +529,36 @@ define i32 @load_breg_zext_shift_offreg_
>>  ret i32 %5
>> }
>> 
>> +; Don't fold if the zext result leaves the basic block.
>> +define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
>> +; FAST-LABEL: load_breg_zext_shift_offreg_3
>> +; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
>> +  %1 = zext i32 %a to i64
>> +  %2 = shl i64 %1, 3
>> +  %3 = add i64 %b, %2
>> +  %4 = inttoptr i64 %3 to i64*
>> +  %5 = load i64* %4
>> +  br i1 %c, label %bb1, label %bb2
>> +bb1:
>> +  %6 = inttoptr i64 %1 to i64*
>> +  %res = load i64* %6
>> +  ret i64 %res
>> +bb2:
>> +  ret i64 %5
>> +}
>> +
>> +define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
>> +; FAST-LABEL: load_breg_zext_shift_offreg_4
>> +; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
>> +  %1 = zext i32 %a to i64
>> +  %2 = shl i64 %1, 3
>> +  %3 = add i64 %b, %2
>> +  %4 = inttoptr i64 %3 to i64*
>> +  %5 = load i64* %4
>> +  %6 = add i64 %1, %5
>> +  ret i64 %6
>> +}
>> +
>> define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
>> ; CHECK-LABEL: load_breg_sext_shift_offreg_1
>> ; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>