[llvm] r216632 - Revert "[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation."
Juergen Ributzka
juergen at apple.com
Wed Aug 27 16:09:40 PDT 2014
Author: ributzka
Date: Wed Aug 27 18:09:40 2014
New Revision: 216632
URL: http://llvm.org/viewvc/llvm-project?rev=216632&view=rev
Log:
Revert "[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation."
Quentin pointed out that this is not the correct approach and there is a better and easier solution.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216632&r1=216631&r2=216632&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 27 18:09:40 2014
@@ -134,11 +134,7 @@ private:
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
- bool isLegalToFoldAddress(const Value *Obj);
- bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
- bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
- bool computeAddressBase(const Value *Obj, Address &Addr);
-
+ bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
@@ -420,68 +416,9 @@ unsigned AArch64FastISel::TargetMaterial
return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
}
-bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
- // Look through BitCast, IntToPtr, and PtrToInt.
- const User *U = nullptr;
- unsigned Opcode = Instruction::UserOp1;
- if (const auto *I = dyn_cast<Instruction>(Obj)) {
- // Bail out if the result is used in a different basic block.
- if (FuncInfo.isExportedInst(I))
- return false;
-
- Opcode = I->getOpcode();
- U = I;
- } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
- Opcode = CE->getOpcode();
- U = CE;
- }
-
- switch (Opcode) {
- default:
- break;
- case Instruction::BitCast:
- return isLegalToFoldAddress(U->getOperand(0));
- case Instruction::IntToPtr:
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return isLegalToFoldAddress(U->getOperand(0));
- break;
- case Instruction::PtrToInt:
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return isLegalToFoldAddress(U->getOperand(0));
- break;
- }
-
- // Allocas never kill their operands, so it is safe to fold it.
- if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
- return true;
-
- const auto *I = cast<Instruction>(Obj);
- // Trivial case - the memory instruction is the only user.
- if (I->hasOneUse())
- return true;
-
- // Check all users - if all of them are memory instructions that FastISel
- // can handle, then it is safe to fold the instruction.
- for (auto *U : I->users())
- if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
- return false;
-
- return true;
-}
-
// Computes the address to get to an object.
-bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
- Type *Ty) {
- // Don't fold instructions into the memory operation if their result is
- // exported to another basic block or has more than one use - except if all
- // uses are memory operations.
- if (isLegalToFoldAddress(Obj))
- return computeAddressRecursively(Obj, Addr, Ty);
- return computeAddressBase(Obj, Addr);
-}
-
-bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
- Type *Ty) {
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -508,18 +445,18 @@ bool AArch64FastISel::computeAddressRecu
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
- // Look past no-op ptrtoints. Don't increment recursion level.
+ // Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
@@ -561,7 +498,7 @@ bool AArch64FastISel::computeAddressRecu
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
- if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
+ if (ComputeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
@@ -582,9 +519,6 @@ bool AArch64FastISel::computeAddressRecu
break;
}
case Instruction::Add: {
- if (!U->hasOneUse())
- break;
-
// Adds of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
@@ -594,21 +528,17 @@ bool AArch64FastISel::computeAddressRecu
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
- return computeAddressRecursively(LHS, Addr, Ty);
+ return ComputeAddress(LHS, Addr, Ty);
}
Address Backup = Addr;
- if (computeAddressRecursively(LHS, Addr, Ty) &&
- computeAddressRecursively(RHS, Addr, Ty))
+ if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
return true;
Addr = Backup;
break;
}
case Instruction::Shl:
- if (!U->hasOneUse())
- break;
-
if (Addr.getOffsetReg())
break;
@@ -631,10 +561,8 @@ bool AArch64FastISel::computeAddressRecu
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
- // Only try to fold the operand if it has one use.
if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
- if (I->hasOneUse() &&
- (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
U = I;
if (const auto *ZE = dyn_cast<ZExtInst>(U))
@@ -654,10 +582,6 @@ bool AArch64FastISel::computeAddressRecu
break;
}
- return computeAddressBase(Obj, Addr);
-}
-
-bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
if (Addr.getReg()) {
if (!Addr.getOffsetReg()) {
unsigned Reg = getRegForValue(Obj);
@@ -1428,7 +1352,7 @@ bool AArch64FastISel::SelectLoad(const I
// See if we can handle this address.
Address Addr;
- if (!computeAddress(I->getOperand(0), Addr, I->getType()))
+ if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
return false;
unsigned ResultReg;
@@ -1545,7 +1469,7 @@ bool AArch64FastISel::SelectStore(const
// See if we can handle this address.
Address Addr;
- if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+ if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
@@ -2453,7 +2377,7 @@ bool AArch64FastISel::FastLowerIntrinsic
if (MTI->isVolatile())
return false;
- // Disable inlining for memmove before calls to computeAddress. Otherwise,
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
@@ -2463,8 +2387,8 @@ bool AArch64FastISel::FastLowerIntrinsic
unsigned Alignment = MTI->getAlignment();
if (IsMemCpySmall(Len, Alignment)) {
Address Dest, Src;
- if (!computeAddress(MTI->getRawDest(), Dest) ||
- !computeAddress(MTI->getRawSource(), Src))
+ if (!ComputeAddress(MTI->getRawDest(), Dest) ||
+ !ComputeAddress(MTI->getRawSource(), Src))
return false;
if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=216632&r1=216631&r2=216632&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 27 18:09:40 2014
@@ -281,50 +281,6 @@ define i64 @load_breg_immoff_8(i64 %a) {
ret i64 %3
}
-; Allow folding of the address if it is used by memory instructions only.
-define void @load_breg_immoff_9(i64 %a) {
-; FAST-LABEL: load_breg_immoff_9
-; FAST: ldr {{x[0-9]+}}, [x0, #96]
-; FAST: str {{x[0-9]+}}, [x0, #96]
- %1 = add i64 %a, 96
- %2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
- %4 = add i64 %3, 1
- store i64 %4, i64* %2
- ret void
-}
-
-; Don't fold if the add result leaves the basic block - even if the user is a
-; memory operation.
-define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
-; FAST-LABEL: load_breg_immoff_10
-; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
- %1 = add i64 %a, 96
- %2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
- br i1 %c, label %bb1, label %bb2
-bb1:
- %4 = shl i64 %1, 3
- %5 = inttoptr i64 %4 to i64*
- %res = load i64* %5
- ret i64 %res
-bb2:
- ret i64 %3
-}
-
-; Don't allow folding of the address if it is used by non-memory instructions.
-define i64 @load_breg_immoff_11(i64 %a) {
-; FAST-LABEL: load_breg_immoff_11
-; FAST: add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
- %1 = add i64 %a, 96
- %2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
- %4 = add i64 %1, %3
- ret i64 %4
-}
-
; Load Base Register + Register Offset
define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_1
@@ -345,33 +301,6 @@ define i64 @load_breg_offreg_2(i64 %a, i
ret i64 %3
}
-; Don't fold if the add result leaves the basic block.
-define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_offreg_3
-; FAST: add [[REG:x[0-9]+]], x0, x1
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
- %1 = add i64 %a, %b
- %2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
- br i1 %c, label %bb1, label %bb2
-bb1:
- %res = load i64* %2
- ret i64 %res
-bb2:
- ret i64 %3
-}
-
-define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_offreg_4
-; FAST: add [[REG:x[0-9]+]], x0, x1
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
- %1 = add i64 %a, %b
- %2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
- %4 = add i64 %1, %3
- ret i64 %4
-}
-
; Load Base Register + Register Offset + Immediate Offset
define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_immoff_1
@@ -476,35 +405,6 @@ define i32 @load_breg_shift_offreg_5(i64
ret i32 %5
}
-; Don't fold if the shift result leaves the basic block.
-define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_shift_offreg_6
-; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
- %1 = shl i64 %a, 3
- %2 = add i64 %b, %1
- %3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
- br i1 %c, label %bb1, label %bb2
-bb1:
- %5 = inttoptr i64 %1 to i64*
- %res = load i64* %5
- ret i64 %res
-bb2:
- ret i64 %4
-}
-
-define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
-; FAST-LABEL: load_breg_shift_offreg_7
-; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
-; FAST-NEXT: ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
- %1 = shl i64 %a, 3
- %2 = add i64 %b, %1
- %3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
- %5 = add i64 %1, %4
- ret i64 %5
-}
; Load Base Register + Scaled Register Offset + Sign/Zero extension
define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
@@ -529,36 +429,6 @@ define i32 @load_breg_zext_shift_offreg_
ret i32 %5
}
-; Don't fold if the zext result leaves the basic block.
-define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_zext_shift_offreg_3
-; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
- %1 = zext i32 %a to i64
- %2 = shl i64 %1, 3
- %3 = add i64 %b, %2
- %4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
- br i1 %c, label %bb1, label %bb2
-bb1:
- %6 = inttoptr i64 %1 to i64*
- %res = load i64* %6
- ret i64 %res
-bb2:
- ret i64 %5
-}
-
-define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
-; FAST-LABEL: load_breg_zext_shift_offreg_4
-; FAST: ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
- %1 = zext i32 %a to i64
- %2 = shl i64 %1, 3
- %3 = add i64 %b, %2
- %4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
- %6 = add i64 %1, %5
- ret i64 %6
-}
-
define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
More information about the llvm-commits
mailing list