[llvm] r218653 - [FastISel][AArch64] Fold sign-/zero-extends into the load instruction.
Juergen Ributzka
juergen at apple.com
Mon Sep 29 17:49:58 PDT 2014
Author: ributzka
Date: Mon Sep 29 19:49:58 2014
New Revision: 218653
URL: http://llvm.org/viewvc/llvm-project?rev=218653&view=rev
Log:
[FastISel][AArch64] Fold sign-/zero-extends into the load instruction.
The sign-/zero-extension of the loaded value can be performed by the memory
instruction for free. If the result of the load has only one use and the use is
a sign-/zero-extend, then we emit the proper load instruction. The extend is
only a register copy and will be optimized away later on.
Other instructions that consume the sign-/zero-extended value are also made
aware of this fact, so they don't fold the extend too.
This fixes rdar://problem/18495928.
Added:
llvm/trunk/test/CodeGen/AArch64/fast-isel-int-ext.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=218653&r1=218652&r2=218653&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Mon Sep 29 19:49:58 2014
@@ -177,7 +177,7 @@ private:
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
- bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
+ bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true,
MachineMemOperand *MMO = nullptr);
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr);
@@ -255,6 +255,23 @@ public:
#include "AArch64GenCallingConv.inc"
+/// \brief Check if the sign-/zero-extend will be a noop.
+static bool isIntExtFree(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ bool IsZExt = isa<ZExtInst>(I);
+
+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (LI->hasOneUse())
+ return true;
+
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
+ return true;
+
+ return false;
+}
+
/// \brief Determine the implicit scale factor that is applied by a memory
/// operation for a given value type.
static unsigned getImplicitScaleFactor(MVT VT) {
@@ -585,72 +602,74 @@ bool AArch64FastISel::computeAddress(con
if (Addr.getOffsetReg())
break;
- if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
- unsigned Val = CI->getZExtValue();
- if (Val < 1 || Val > 3)
- break;
+ const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!CI)
+ break;
- uint64_t NumBytes = 0;
- if (Ty && Ty->isSized()) {
- uint64_t NumBits = DL.getTypeSizeInBits(Ty);
- NumBytes = NumBits / 8;
- if (!isPowerOf2_64(NumBits))
- NumBytes = 0;
- }
+ unsigned Val = CI->getZExtValue();
+ if (Val < 1 || Val > 3)
+ break;
- if (NumBytes != (1ULL << Val))
- break;
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
- Addr.setShift(Val);
- Addr.setExtendType(AArch64_AM::LSL);
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
- const Value *Src = U->getOperand(0);
- if (const auto *I = dyn_cast<Instruction>(Src))
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- Src = I;
+ const Value *Src = U->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
- if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
- if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
- if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
- }
}
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
- if (const auto *AI = dyn_cast<BinaryOperator>(Src))
- if (AI->getOpcode() == Instruction::And) {
- const Value *LHS = AI->getOperand(0);
- const Value *RHS = AI->getOperand(1);
-
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->getValue() == 0xffffffff)
- std::swap(LHS, RHS);
-
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 0xffffffff) {
- Addr.setExtendType(AArch64_AM::UXTW);
- unsigned Reg = getRegForValue(LHS);
- if (!Reg)
- return false;
- bool RegIsKill = hasTrivialKill(LHS);
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
- AArch64::sub_32);
- Addr.setOffsetReg(Reg);
- return true;
- }
- }
+ if (const auto *AI = dyn_cast<BinaryOperator>(Src))
+ if (AI->getOpcode() == Instruction::And) {
+ const Value *LHS = AI->getOperand(0);
+ const Value *RHS = AI->getOperand(1);
- unsigned Reg = getRegForValue(Src);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- break;
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
}
case Instruction::Mul: {
if (Addr.getOffsetReg())
@@ -692,13 +711,15 @@ bool AArch64FastISel::computeAddress(con
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
Src = I;
+
+ // Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
- if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
- if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
@@ -1568,7 +1589,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT
}
bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- MachineMemOperand *MMO) {
+ bool WantZExt, MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return false;
@@ -1585,20 +1606,38 @@ bool AArch64FastISel::emitLoad(MVT VT, u
ScaleFactor = 1;
}
- static const unsigned OpcTable[4][6] = {
- { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
- AArch64::LDURSi, AArch64::LDURDi },
- { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
- AArch64::LDRSui, AArch64::LDRDui },
- { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
- AArch64::LDRSroX, AArch64::LDRDroX },
- { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
- AArch64::LDRSroW, AArch64::LDRDroW }
+ static const unsigned GPOpcTable[2][4][4] = {
+ // Sign-extend.
+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi,
+ AArch64::LDURXi },
+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW,
+ AArch64::LDRXroW },
+ },
+ // Zero-extend.
+ { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW }
+ }
+ };
+
+ static const unsigned FPOpcTable[4][2] = {
+ { AArch64::LDURSi, AArch64::LDURDi },
+ { AArch64::LDRSui, AArch64::LDRDui },
+ { AArch64::LDRSroX, AArch64::LDRDroX },
+ { AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
- bool VTIsi1 = false;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
@@ -1607,14 +1646,33 @@ bool AArch64FastISel::emitLoad(MVT VT, u
Idx++;
switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: VTIsi1 = true; // Intentional fall-through.
- case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
- case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
- case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
+ default:
+ llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // Intentional fall-through.
+ case MVT::i8:
+ Opc = GPOpcTable[WantZExt][Idx][0];
+ RC = &AArch64::GPR32RegClass;
+ break;
+ case MVT::i16:
+ Opc = GPOpcTable[WantZExt][Idx][1];
+ RC = &AArch64::GPR32RegClass;
+ break;
+ case MVT::i32:
+ Opc = GPOpcTable[WantZExt][Idx][2];
+ RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+ break;
+ case MVT::i64:
+ Opc = GPOpcTable[WantZExt][Idx][3];
+ RC = &AArch64::GPR64RegClass;
+ break;
+ case MVT::f32:
+ Opc = FPOpcTable[Idx][0];
+ RC = &AArch64::FPR32RegClass;
+ break;
+ case MVT::f64:
+ Opc = FPOpcTable[Idx][1];
+ RC = &AArch64::FPR64RegClass;
+ break;
}
// Create the base instruction, then add the operands.
@@ -1623,8 +1681,14 @@ bool AArch64FastISel::emitLoad(MVT VT, u
TII.get(Opc), ResultReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
+ // For 32bit loads we do sign-extending loads to 64bit and then extract the
+ // subreg. In the end this is just a NOOP.
+ if (VT == MVT::i32 && !WantZExt)
+ ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true,
+ AArch64::sub_32);
+
// Loading an i1 requires special handling.
- if (VTIsi1) {
+ if (VT == MVT::i1) {
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
@@ -1701,8 +1765,12 @@ bool AArch64FastISel::selectLoad(const I
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
+ bool WantZExt = true;
+ if (I->hasOneUse() && isa<SExtInst>(I->use_begin()->getUser()))
+ WantZExt = false;
+
unsigned ResultReg;
- if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
+ if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I)))
return false;
updateValueMap(I, ResultReg);
@@ -3776,46 +3844,60 @@ unsigned AArch64FastISel::emitIntExt(MVT
}
bool AArch64FastISel::selectIntExt(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Src = I->getOperand(0);
- Type *SrcTy = Src->getType();
-
- unsigned SrcReg = getRegForValue(Src);
- if (!SrcReg)
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ MVT RetVT;
+ MVT SrcVT;
+ if (!isTypeSupported(I->getType(), RetVT))
return false;
- EVT SrcEVT = TLI.getValueType(SrcTy, true);
- EVT DestEVT = TLI.getValueType(DestTy, true);
- if (!SrcEVT.isSimple())
- return false;
- if (!DestEVT.isSimple())
+ if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = 0;
+ if (isIntExtFree(I)) {
+ unsigned SrcReg = getRegForValue(I->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
- bool IsZExt = isa<ZExtInst>(I);
- // Check if it is an argument and if it is already zero/sign-extended.
- if (const auto *Arg = dyn_cast<Argument>(Src)) {
- if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
- if (DestVT == MVT::i64) {
- ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ const TargetRegisterClass *RC = (RetVT == MVT::i64) ?
+ &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned ResultReg = createResultReg(RC);
+ if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
.addImm(0)
- .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
.addImm(AArch64::sub_32);
- } else
- ResultReg = SrcReg;
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned SrcReg = getRegForValue(I->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = 0;
+ if (isIntExtFree(I)) {
+ if (RetVT == MVT::i64) {
+ ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(SrcRegIsKill))
+ .addImm(AArch64::sub_32);
+ } else
+ ResultReg = SrcReg;
}
if (!ResultReg)
- ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt);
+ ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
if (!ResultReg)
return false;
@@ -3891,18 +3973,22 @@ bool AArch64FastISel::selectMul(const In
MVT SrcVT = VT;
bool IsZExt = true;
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
- MVT VT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = true;
- Src0 = ZExt->getOperand(0);
+ if (!isIntExtFree(ZExt)) {
+ MVT VT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = true;
+ Src0 = ZExt->getOperand(0);
+ }
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
- MVT VT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = false;
- Src0 = SExt->getOperand(0);
+ if (!isIntExtFree(SExt)) {
+ MVT VT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = false;
+ Src0 = SExt->getOperand(0);
+ }
}
}
@@ -3954,18 +4040,22 @@ bool AArch64FastISel::selectShift(const
bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
- MVT TmpVT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = true;
- Op0 = ZExt->getOperand(0);
+ if (!isIntExtFree(ZExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = true;
+ Op0 = ZExt->getOperand(0);
+ }
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
- MVT TmpVT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = false;
- Op0 = SExt->getOperand(0);
+ if (!isIntExtFree(SExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = false;
+ Op0 = SExt->getOperand(0);
+ }
}
}
@@ -4213,13 +4303,8 @@ bool AArch64FastISel::fastSelectInstruct
case Instruction::FPToUI:
return selectFPToInt(I, /*Signed=*/false);
case Instruction::ZExt:
- if (!selectCast(I, ISD::ZERO_EXTEND))
- return selectIntExt(I);
- return true;
case Instruction::SExt:
- if (!selectCast(I, ISD::SIGN_EXTEND))
- return selectIntExt(I);
- return true;
+ return selectIntExt(I);
case Instruction::Trunc:
if (!selectCast(I, ISD::TRUNCATE))
return selectTrunc(I);
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll?rev=218653&r1=218652&r2=218653&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll Mon Sep 29 19:49:58 2014
@@ -17,7 +17,6 @@ entry:
; CHECK: ldrh w0, [sp, #12]
; CHECK: strb w0, [sp, #15]
; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
; CHECK: add sp, sp, #16
; CHECK: ret
%a.addr = alloca i8, align 1
@@ -51,14 +50,11 @@ entry:
; CHECK: str w2, [sp, #8]
; CHECK: str x3, [sp]
; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
; CHECK: strh w0, [sp, #12]
; CHECK: ldrh w0, [sp, #12]
-; CHECK: uxth w0, w0
; CHECK: str w0, [sp, #8]
; CHECK: ldr w0, [sp, #8]
; CHECK: mov x3, x0
-; CHECK: ubfx x3, x3, #0, #32
; CHECK: str x3, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
@@ -109,15 +105,11 @@ entry:
; CHECK: strh w1, [sp, #12]
; CHECK: str w2, [sp, #8]
; CHECK: str x3, [sp]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: sxtb w0, w0
+; CHECK: ldrsb w0, [sp, #15]
; CHECK: strh w0, [sp, #12]
-; CHECK: ldrh w0, [sp, #12]
-; CHECK: sxth w0, w0
+; CHECK: ldrsh w0, [sp, #12]
; CHECK: str w0, [sp, #8]
-; CHECK: ldr w0, [sp, #8]
-; CHECK: mov x3, x0
-; CHECK: sxtw x3, w3
+; CHECK: ldrsw x3, [sp, #8]
; CHECK: str x3, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
Added: llvm/trunk/test/CodeGen/AArch64/fast-isel-int-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-int-ext.ll?rev=218653&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-int-ext.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-int-ext.ll Mon Sep 29 19:49:58 2014
@@ -0,0 +1,190 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+;
+; Test that we only use the sign/zero extend in the address calculation when
+; necessary.
+;
+; SHIFT
+;
+define i64 @load_addr_shift_zext1(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_zext1
+; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+ %1 = zext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_shift_zext2(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_zext2
+; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3{{\]}}
+ %1 = zext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_shift_sext1(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_sext1
+; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+ %1 = sext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_sext2
+; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+ %1 = sext i32 %a to i64
+ %2 = shl i64 %1, 3
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+;
+; MUL
+;
+define i64 @load_addr_mul_zext1(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_zext1
+; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+ %1 = zext i32 %a to i64
+ %2 = mul i64 %1, 8
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_mul_zext2(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_zext2
+; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+ %1 = zext i32 %a to i64
+ %2 = mul i64 %1, 8
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_mul_sext1(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_sext1
+; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+ %1 = sext i32 %a to i64
+ %2 = mul i64 %1, 8
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_sext2
+; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+ %1 = sext i32 %a to i64
+ %2 = mul i64 %1, 8
+ %3 = add i64 %b, %2
+ %4 = inttoptr i64 %3 to i64*
+ %5 = load i64* %4
+ ret i64 %5
+}
+
+; Test folding of the sign-/zero-extend into the load instruction.
+define i32 @load_zext_i8_to_i32(i8* %a) {
+; CHECK-LABEL: load_zext_i8_to_i32
+; CHECK: ldrb w0, [x0]
+; CHECK-NOT: uxtb
+ %1 = load i8* %a
+ %2 = zext i8 %1 to i32
+ ret i32 %2
+}
+
+define i32 @load_zext_i16_to_i32(i16* %a) {
+; CHECK-LABEL: load_zext_i16_to_i32
+; CHECK: ldrh w0, [x0]
+; CHECK-NOT: uxth
+ %1 = load i16* %a
+ %2 = zext i16 %1 to i32
+ ret i32 %2
+}
+
+define i64 @load_zext_i8_to_i64(i8* %a) {
+; CHECK-LABEL: load_zext_i8_to_i64
+; CHECK: ldrb w0, [x0]
+; CHECK-NOT: uxtb
+ %1 = load i8* %a
+ %2 = zext i8 %1 to i64
+ ret i64 %2
+}
+
+define i64 @load_zext_i16_to_i64(i16* %a) {
+; CHECK-LABEL: load_zext_i16_to_i64
+; CHECK: ldrh w0, [x0]
+; CHECK-NOT: uxth
+ %1 = load i16* %a
+ %2 = zext i16 %1 to i64
+ ret i64 %2
+}
+
+define i64 @load_zext_i32_to_i64(i32* %a) {
+; CHECK-LABEL: load_zext_i32_to_i64
+; CHECK: ldr w0, [x0]
+; CHECK-NOT: uxtw
+ %1 = load i32* %a
+ %2 = zext i32 %1 to i64
+ ret i64 %2
+}
+
+define i32 @load_sext_i8_to_i32(i8* %a) {
+; CHECK-LABEL: load_sext_i8_to_i32
+; CHECK: ldrsb w0, [x0]
+; CHECK-NOT: sxtb
+ %1 = load i8* %a
+ %2 = sext i8 %1 to i32
+ ret i32 %2
+}
+
+define i32 @load_sext_i16_to_i32(i16* %a) {
+; CHECK-LABEL: load_sext_i16_to_i32
+; CHECK: ldrsh w0, [x0]
+; CHECK-NOT: sxth
+ %1 = load i16* %a
+ %2 = sext i16 %1 to i32
+ ret i32 %2
+}
+
+define i64 @load_sext_i8_to_i64(i8* %a) {
+; CHECK-LABEL: load_sext_i8_to_i64
+; CHECK: ldrsb w0, [x0]
+; CHECK-NOT: sxtb
+ %1 = load i8* %a
+ %2 = sext i8 %1 to i64
+ ret i64 %2
+}
+
+define i64 @load_sext_i16_to_i64(i16* %a) {
+; CHECK-LABEL: load_sext_i16_to_i64
+; CHECK: ldrsh w0, [x0]
+; CHECK-NOT: sxth
+ %1 = load i16* %a
+ %2 = sext i16 %1 to i64
+ ret i64 %2
+}
+
+define i64 @load_sext_i32_to_i64(i32* %a) {
+; CHECK-LABEL: load_sext_i32_to_i64
+; CHECK: ldrsw x0, [x0]
+; CHECK-NOT: sxtw
+ %1 = load i32* %a
+ %2 = sext i32 %1 to i64
+ ret i64 %2
+}
+
More information about the llvm-commits
mailing list