[llvm] r216629 - [FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.
Juergen Ributzka
juergen at apple.com
Wed Aug 27 15:52:33 PDT 2014
Author: ributzka
Date: Wed Aug 27 17:52:33 2014
New Revision: 216629
URL: http://llvm.org/viewvc/llvm-project?rev=216629&view=rev
Log:
[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation.
Currently instructions are folded very aggressively into the memory operation,
which can lead to the use of killed operands:
%vreg1<def> = ADDXri %vreg0<kill>, 2
%vreg2<def> = LDRBBui %vreg0, 2
... = ... %vreg1 ...
This usually happens when the result is also used by another non-memory
instruction in the same basic block, or any instruction in another basic block.
If the computed address is used by only memory operations in the same basic
block, then it is safe to fold them. This is because all memory operations will
fold the address computation and the original computation will never be emitted.
This fixes rdar://problem/18142857.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216629&r1=216628&r2=216629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 27 17:52:33 2014
@@ -134,7 +134,11 @@ private:
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
- bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+ bool isLegalToFoldAddress(const Value *Obj);
+ bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+ bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
+ bool computeAddressBase(const Value *Obj, Address &Addr);
+
bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
@@ -416,9 +420,68 @@ unsigned AArch64FastISel::TargetMaterial
return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
}
+bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
+ // Look through BitCast, IntToPtr, and PtrToInt.
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const auto *I = dyn_cast<Instruction>(Obj)) {
+ // Bail out if the result is used in a different basic block.
+ if (FuncInfo.isExportedInst(I))
+ return false;
+
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = CE->getOpcode();
+ U = CE;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ return isLegalToFoldAddress(U->getOperand(0));
+ case Instruction::IntToPtr:
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return isLegalToFoldAddress(U->getOperand(0));
+ break;
+ case Instruction::PtrToInt:
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return isLegalToFoldAddress(U->getOperand(0));
+ break;
+ }
+
+ // Allocas never kill their operands, so it is safe to fold it.
+ if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
+ return true;
+
+ const auto *I = cast<Instruction>(Obj);
+ // Trivial case - the memory instruction is the only user.
+ if (I->hasOneUse())
+ return true;
+
+ // Check all users - if all of them are memory instructions that FastISel
+ // can handle, then it is safe to fold the instruction.
+ for (auto *U : I->users())
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+ return false;
+
+ return true;
+}
+
// Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
-{
+bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
+ Type *Ty) {
+ // Don't fold instructions into the memory operation if their result is
+ // exported to another basic block or has more than one use - except if all
+ // uses are memory operations.
+ if (isLegalToFoldAddress(Obj))
+ return computeAddressRecursively(Obj, Addr, Ty);
+ return computeAddressBase(Obj, Addr);
+}
+
+bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
+ Type *Ty) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -445,18 +508,18 @@ bool AArch64FastISel::ComputeAddress(con
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return ComputeAddress(U->getOperand(0), Addr, Ty);
+ return computeAddressRecursively(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr, Ty);
+ return computeAddressRecursively(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
- // Look past no-op ptrtoints.
+ // Look past no-op ptrtoints. Don't increment recursion level.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr, Ty);
+ return computeAddressRecursively(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
@@ -498,7 +561,7 @@ bool AArch64FastISel::ComputeAddress(con
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
- if (ComputeAddress(U->getOperand(0), Addr, Ty))
+ if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
@@ -519,6 +582,9 @@ bool AArch64FastISel::ComputeAddress(con
break;
}
case Instruction::Add: {
+ if (!U->hasOneUse())
+ break;
+
// Adds of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
@@ -528,17 +594,21 @@ bool AArch64FastISel::ComputeAddress(con
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
- return ComputeAddress(LHS, Addr, Ty);
+ return computeAddressRecursively(LHS, Addr, Ty);
}
Address Backup = Addr;
- if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
+ if (computeAddressRecursively(LHS, Addr, Ty) &&
+ computeAddressRecursively(RHS, Addr, Ty))
return true;
Addr = Backup;
break;
}
case Instruction::Shl:
+ if (!U->hasOneUse())
+ break;
+
if (Addr.getOffsetReg())
break;
@@ -561,8 +631,10 @@ bool AArch64FastISel::ComputeAddress(con
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
+ // Only try to fold the operand if it has one use.
if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ if (I->hasOneUse() &&
+ (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
U = I;
if (const auto *ZE = dyn_cast<ZExtInst>(U))
@@ -582,6 +654,10 @@ bool AArch64FastISel::ComputeAddress(con
break;
}
+ return computeAddressBase(Obj, Addr);
+}
+
+bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
if (Addr.getReg()) {
if (!Addr.getOffsetReg()) {
unsigned Reg = getRegForValue(Obj);
@@ -1352,7 +1428,7 @@ bool AArch64FastISel::SelectLoad(const I
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
+ if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
unsigned ResultReg;
@@ -1469,7 +1545,7 @@ bool AArch64FastISel::SelectStore(const
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+ if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
@@ -2377,7 +2453,7 @@ bool AArch64FastISel::FastLowerIntrinsic
if (MTI->isVolatile())
return false;
- // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // Disable inlining for memmove before calls to computeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
@@ -2387,8 +2463,8 @@ bool AArch64FastISel::FastLowerIntrinsic
unsigned Alignment = MTI->getAlignment();
if (IsMemCpySmall(Len, Alignment)) {
Address Dest, Src;
- if (!ComputeAddress(MTI->getRawDest(), Dest) ||
- !ComputeAddress(MTI->getRawSource(), Src))
+ if (!computeAddress(MTI->getRawDest(), Dest) ||
+ !computeAddress(MTI->getRawSource(), Src))
return false;
if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=216629&r1=216628&r2=216629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 27 17:52:33 2014
@@ -281,6 +281,50 @@ define i64 @load_breg_immoff_8(i64 %a) {
ret i64 %3
}
+; Allow folding of the address if it is used by memory instructions only.
+define void @load_breg_immoff_9(i64 %a) {
+; FAST-LABEL: load_breg_immoff_9
+; FAST: ldr {{x[0-9]+}}, [x0, #96]
+; FAST: str {{x[0-9]+}}, [x0, #96]
+ %1 = add i64 %a, 96
+ %2 = inttoptr i64 %1 to i64*
+ %3 = load i64* %2
+ %4 = add i64 %3, 1
+ store i64 %4, i64* %2
+ ret void
+}
+
+; Don't fold if the add result leaves the basic block - even if the user is a
+; memory operation.
+define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
+; FAST-LABEL: load_breg_immoff_10
+; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+ %1 = add i64 %a, 96
+ %2 = inttoptr i64 %1 to i64*
+ %3 = load i64* %2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ %4 = shl i64 %1, 3
+ %5 = inttoptr i64 %4 to i64*
+ %res = load i64* %5
+ ret i64 %res
+bb2:
+ ret i64 %3
+}
+
+; Don't allow folding of the address if it is used by non-memory instructions.
+define i64 @load_breg_immoff_11(i64 %a) {
+; FAST-LABEL: load_breg_immoff_11
+; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+ %1 = add i64 %a, 96
+ %2 = inttoptr i64 %1 to i64*
+ %3 = load i64* %2
+ %4 = add i64 %1, %3
+ ret i64 %4
+}
+
; Load Base Register + Register Offset
define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_1
@@ -301,6 +345,33 @@ define i64 @load_breg_offreg_2(i64 %a, i
ret i64 %3
}
+; Don't fold if the add result leaves the basic block.
+define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_offreg_3
+; FAST: add [[REG:x[0-9]+]], x0, x1
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+ %1 = add i64 %a, %b
+ %2 = inttoptr i64 %1 to i64*
+ %3 = load i64* %2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ %res = load i64* %2
+ ret i64 %res
+bb2:
+ ret i64 %3
+}
+
+define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_offreg_4
+; FAST: add [[REG:x[0-9]+]], x0, x1
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
+ %1 = add i64 %a, %b
+ %2 = inttoptr i64 %1 to i64*
+ %3 = load i64* %2
+ %4 = add i64 %1, %3
+ ret i64 %4
+}
+
; Load Base Register + Register Offset + Immediate Offset
define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_immoff_1
@@ -405,6 +476,35 @@ define i32 @load_breg_shift_offreg_5(i64
ret i32 %5
}
+; Don't fold if the shift result leaves the basic block.
+define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_shift_offreg_6
+; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
+ %1 = shl i64 %a, 3
+ %2 = add i64 %b, %1
+ %3 = inttoptr i64 %2 to i64*
+ %4 = load i64* %3
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ %5 = inttoptr i64 %1 to i64*
+ %res = load i64* %5
+ ret i64 %res
+bb2:
+ ret i64 %4
+}
+
+define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
+; FAST-LABEL: load_breg_shift_offreg_7
+; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
+ %1 = shl i64 %a, 3
+ %2 = add i64 %b, %1
+ %3 = inttoptr i64 %2 to i64*
+ %4 = load i64* %3
+ %5 = add i64 %1, %4
+ ret i64 %5
+}
; Load Base Register + Scaled Register Offset + Sign/Zero extension
define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
@@ -429,6 +529,36 @@ define i32 @load_breg_zext_shift_offreg_
ret i32 %5
}
+; Don't fold if the zext result leaves the basic block.
+define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
+; FAST-LABEL: load_breg_zext_shift_offreg_3
+; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
+ %1 = zext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ %6 = inttoptr i64 %1 to i64*
+ %res = load i64* %6
+ ret i64 %res
+bb2:
+ ret i64 %5
+}
+
+define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
+; FAST-LABEL: load_breg_zext_shift_offreg_4
+; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
+ %1 = zext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ %6 = add i64 %1, %5
+ ret i64 %6
+}
+
define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
More information about the llvm-commits
mailing list