[llvm] r216629 - [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.
Quentin Colombet
qcolombet at apple.com
Wed Aug 27 16:12:14 PDT 2014
Hi Juergen,
I can understand that we may not want to fold something in an addressing mode for performance reasons, but if this is to avoid a use of a kill operand that sound like the wrong approach to me.
If you extend the live-range of an operand, just make sure to use MachineRegisterInfo::clearKillFlags.
Cheers,
-Quentin
On Aug 27, 2014, at 3:52 PM, Juergen Ributzka <juergen at apple.com> wrote:
> Author: ributzka
> Date: Wed Aug 27 17:52:33 2014
> New Revision: 216629
>
> URL: http://llvm.org/viewvc/llvm-project?rev=216629&view=rev
> Log:
> [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.
>
> Currently instructions are folded very aggressively into the memory operation,
> which can lead to the use of killed operands:
> %vreg1<def> = ADDXri %vreg0<kill>, 2
> %vreg2<def> = LDRBBui %vreg0, 2
> ... = ... %vreg1 ...
>
> This usually happens when the result is also used by another non-memory
> instruction in the same basic block, or any instruction in another basic block.
>
> If the computed address is used by only memory operations in the same basic
> block, then it is safe to fold them. This is because all memory operations will
> fold the address computation and the original computation will never be emitted.
>
> This fixes rdar://problem/18142857.
>
> Modified:
> llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
> llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216629&r1=216628&r2=216629&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 27 17:52:33 2014
> @@ -134,7 +134,11 @@ private:
> // Utility helper routines.
> bool isTypeLegal(Type *Ty, MVT &VT);
> bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
> - bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
> + bool isLegalToFoldAddress(const Value *Obj);
> + bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
> + bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
> + bool computeAddressBase(const Value *Obj, Address &Addr);
> +
> bool ComputeCallAddress(const Value *V, Address &Addr);
> bool SimplifyAddress(Address &Addr, MVT VT);
> void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
> @@ -416,9 +420,68 @@ unsigned AArch64FastISel::TargetMaterial
> return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
> }
>
> +bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
> + // Look through BitCast, IntToPtr, and PtrToInt.
> + const User *U = nullptr;
> + unsigned Opcode = Instruction::UserOp1;
> + if (const auto *I = dyn_cast<Instruction>(Obj)) {
> + // Bail out if the result is used in a different basic block.
> + if (FuncInfo.isExportedInst(I))
> + return false;
> +
> + Opcode = I->getOpcode();
> + U = I;
> + } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
> + Opcode = CE->getOpcode();
> + U = CE;
> + }
> +
> + switch (Opcode) {
> + default:
> + break;
> + case Instruction::BitCast:
> + return isLegalToFoldAddress(U->getOperand(0));
> + case Instruction::IntToPtr:
> + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
> + return isLegalToFoldAddress(U->getOperand(0));
> + break;
> + case Instruction::PtrToInt:
> + if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
> + return isLegalToFoldAddress(U->getOperand(0));
> + break;
> + }
> +
> + // Allocas never kill their operands, so it is safe to fold it.
> + if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
> + return true;
> +
> + const auto *I = cast<Instruction>(Obj);
> + // Trivial case - the memory instruction is the only user.
> + if (I->hasOneUse())
> + return true;
> +
> + // Check all users - if all of them are memory instructions that FastISel
> + // can handle, then it is safe to fold the instruction.
> + for (auto *U : I->users())
> + if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
> + return false;
> +
> + return true;
> +}
> +
> // Computes the address to get to an object.
> -bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
> -{
> +bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
> + Type *Ty) {
> + // Don't fold instructions into the memory operation if their result is
> + // exported to another basic block or has more than one use - except if all
> + // uses are memory operations.
> + if (isLegalToFoldAddress(Obj))
> + return computeAddressRecursively(Obj, Addr, Ty);
> + return computeAddressBase(Obj, Addr);
> +}
> +
> +bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
> + Type *Ty) {
> const User *U = nullptr;
> unsigned Opcode = Instruction::UserOp1;
> if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
> @@ -445,18 +508,18 @@ bool AArch64FastISel::ComputeAddress(con
> break;
> case Instruction::BitCast: {
> // Look through bitcasts.
> - return ComputeAddress(U->getOperand(0), Addr, Ty);
> + return computeAddressRecursively(U->getOperand(0), Addr, Ty);
> }
> case Instruction::IntToPtr: {
> // Look past no-op inttoptrs.
> if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
> - return ComputeAddress(U->getOperand(0), Addr, Ty);
> + return computeAddressRecursively(U->getOperand(0), Addr, Ty);
> break;
> }
> case Instruction::PtrToInt: {
> - // Look past no-op ptrtoints.
> + // Look past no-op ptrtoints. Don't increment recursion level.
> if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
> - return ComputeAddress(U->getOperand(0), Addr, Ty);
> + return computeAddressRecursively(U->getOperand(0), Addr, Ty);
> break;
> }
> case Instruction::GetElementPtr: {
> @@ -498,7 +561,7 @@ bool AArch64FastISel::ComputeAddress(con
>
> // Try to grab the base operand now.
> Addr.setOffset(TmpOffset);
> - if (ComputeAddress(U->getOperand(0), Addr, Ty))
> + if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
> return true;
>
> // We failed, restore everything and try the other options.
> @@ -519,6 +582,9 @@ bool AArch64FastISel::ComputeAddress(con
> break;
> }
> case Instruction::Add: {
> + if (!U->hasOneUse())
> + break;
> +
> // Adds of constants are common and easy enough.
> const Value *LHS = U->getOperand(0);
> const Value *RHS = U->getOperand(1);
> @@ -528,17 +594,21 @@ bool AArch64FastISel::ComputeAddress(con
>
> if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
> Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
> - return ComputeAddress(LHS, Addr, Ty);
> + return computeAddressRecursively(LHS, Addr, Ty);
> }
>
> Address Backup = Addr;
> - if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
> + if (computeAddressRecursively(LHS, Addr, Ty) &&
> + computeAddressRecursively(RHS, Addr, Ty))
> return true;
> Addr = Backup;
>
> break;
> }
> case Instruction::Shl:
> + if (!U->hasOneUse())
> + break;
> +
> if (Addr.getOffsetReg())
> break;
>
> @@ -561,8 +631,10 @@ bool AArch64FastISel::ComputeAddress(con
> Addr.setShift(Val);
> Addr.setExtendType(AArch64_AM::LSL);
>
> + // Only try to fold the operand if it has one use.
> if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
> - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
> + if (I->hasOneUse() &&
> + (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
> U = I;
>
> if (const auto *ZE = dyn_cast<ZExtInst>(U))
> @@ -582,6 +654,10 @@ bool AArch64FastISel::ComputeAddress(con
> break;
> }
>
> + return computeAddressBase(Obj, Addr);
> +}
> +
> +bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
> if (Addr.getReg()) {
> if (!Addr.getOffsetReg()) {
> unsigned Reg = getRegForValue(Obj);
> @@ -1352,7 +1428,7 @@ bool AArch64FastISel::SelectLoad(const I
>
> // See if we can handle this address.
> Address Addr;
> - if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
> + if (!computeAddress(I->getOperand(0), Addr, I->getType()))
> return false;
>
> unsigned ResultReg;
> @@ -1469,7 +1545,7 @@ bool AArch64FastISel::SelectStore(const
>
> // See if we can handle this address.
> Address Addr;
> - if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
> + if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
> return false;
>
> if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
> @@ -2377,7 +2453,7 @@ bool AArch64FastISel::FastLowerIntrinsic
> if (MTI->isVolatile())
> return false;
>
> - // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
> + // Disable inlining for memmove before calls to computeAddress. Otherwise,
> // we would emit dead code because we don't currently handle memmoves.
> bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
> if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
> @@ -2387,8 +2463,8 @@ bool AArch64FastISel::FastLowerIntrinsic
> unsigned Alignment = MTI->getAlignment();
> if (IsMemCpySmall(Len, Alignment)) {
> Address Dest, Src;
> - if (!ComputeAddress(MTI->getRawDest(), Dest) ||
> - !ComputeAddress(MTI->getRawSource(), Src))
> + if (!computeAddress(MTI->getRawDest(), Dest) ||
> + !computeAddress(MTI->getRawSource(), Src))
> return false;
> if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
> return true;
>
> Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=216629&r1=216628&r2=216629&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 27 17:52:33 2014
> @@ -281,6 +281,50 @@ define i64 @load_breg_immoff_8(i64 %a) {
> ret i64 %3
> }
>
> +; Allow folding of the address if it is used by memory instructions only.
> +define void @load_breg_immoff_9(i64 %a) {
> +; FAST-LABEL: load_breg_immoff_9
> +; FAST: ldr {{x[0-9]+}}, [x0, #96]
> +; FAST: str {{x[0-9]+}}, [x0, #96]
> + %1 = add i64 %a, 96
> + %2 = inttoptr i64 %1 to i64*
> + %3 = load i64* %2
> + %4 = add i64 %3, 1
> + store i64 %4, i64* %2
> + ret void
> +}
> +
> +; Don't fold if the add result leaves the basic block - even if the user is a
> +; memory operation.
> +define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
> +; FAST-LABEL: load_breg_immoff_10
> +; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
> + %1 = add i64 %a, 96
> + %2 = inttoptr i64 %1 to i64*
> + %3 = load i64* %2
> + br i1 %c, label %bb1, label %bb2
> +bb1:
> + %4 = shl i64 %1, 3
> + %5 = inttoptr i64 %4 to i64*
> + %res = load i64* %5
> + ret i64 %res
> +bb2:
> + ret i64 %3
> +}
> +
> +; Don't allow folding of the address if it is used by non-memory instructions.
> +define i64 @load_breg_immoff_11(i64 %a) {
> +; FAST-LABEL: load_breg_immoff_11
> +; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
> + %1 = add i64 %a, 96
> + %2 = inttoptr i64 %1 to i64*
> + %3 = load i64* %2
> + %4 = add i64 %1, %3
> + ret i64 %4
> +}
> +
> ; Load Base Register + Register Offset
> define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
> ; CHECK-LABEL: load_breg_offreg_1
> @@ -301,6 +345,33 @@ define i64 @load_breg_offreg_2(i64 %a, i
> ret i64 %3
> }
>
> +; Don't fold if the add result leaves the basic block.
> +define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
> +; FAST-LABEL: load_breg_offreg_3
> +; FAST: add [[REG:x[0-9]+]], x0, x1
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
> + %1 = add i64 %a, %b
> + %2 = inttoptr i64 %1 to i64*
> + %3 = load i64* %2
> + br i1 %c, label %bb1, label %bb2
> +bb1:
> + %res = load i64* %2
> + ret i64 %res
> +bb2:
> + ret i64 %3
> +}
> +
> +define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
> +; FAST-LABEL: load_breg_offreg_4
> +; FAST: add [[REG:x[0-9]+]], x0, x1
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
> + %1 = add i64 %a, %b
> + %2 = inttoptr i64 %1 to i64*
> + %3 = load i64* %2
> + %4 = add i64 %1, %3
> + ret i64 %4
> +}
> +
> ; Load Base Register + Register Offset + Immediate Offset
> define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
> ; CHECK-LABEL: load_breg_offreg_immoff_1
> @@ -405,6 +476,35 @@ define i32 @load_breg_shift_offreg_5(i64
> ret i32 %5
> }
>
> +; Don't fold if the shift result leaves the basic block.
> +define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
> +; FAST-LABEL: load_breg_shift_offreg_6
> +; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
> + %1 = shl i64 %a, 3
> + %2 = add i64 %b, %1
> + %3 = inttoptr i64 %2 to i64*
> + %4 = load i64* %3
> + br i1 %c, label %bb1, label %bb2
> +bb1:
> + %5 = inttoptr i64 %1 to i64*
> + %res = load i64* %5
> + ret i64 %res
> +bb2:
> + ret i64 %4
> +}
> +
> +define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
> +; FAST-LABEL: load_breg_shift_offreg_7
> +; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
> +; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
> + %1 = shl i64 %a, 3
> + %2 = add i64 %b, %1
> + %3 = inttoptr i64 %2 to i64*
> + %4 = load i64* %3
> + %5 = add i64 %1, %4
> + ret i64 %5
> +}
>
> ; Load Base Register + Scaled Register Offset + Sign/Zero extension
> define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
> @@ -429,6 +529,36 @@ define i32 @load_breg_zext_shift_offreg_
> ret i32 %5
> }
>
> +; Don't fold if the zext result leaves the basic block.
> +define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
> +; FAST-LABEL: load_breg_zext_shift_offreg_3
> +; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
> + %1 = zext i32 %a to i64
> + %2 = shl i64 %1, 3
> + %3 = add i64 %b, %2
> + %4 = inttoptr i64 %3 to i64*
> + %5 = load i64* %4
> + br i1 %c, label %bb1, label %bb2
> +bb1:
> + %6 = inttoptr i64 %1 to i64*
> + %res = load i64* %6
> + ret i64 %res
> +bb2:
> + ret i64 %5
> +}
> +
> +define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
> +; FAST-LABEL: load_breg_zext_shift_offreg_4
> +; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
> + %1 = zext i32 %a to i64
> + %2 = shl i64 %1, 3
> + %3 = add i64 %b, %2
> + %4 = inttoptr i64 %3 to i64*
> + %5 = load i64* %4
> + %6 = add i64 %1, %5
> + ret i64 %6
> +}
> +
> define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
> ; CHECK-LABEL: load_breg_sext_shift_offreg_1
> ; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list