[llvm] r215151 - MachineCombiner Pass for selecting faster instruction sequence on AArch64
Chad Rosier
mcrosier at codeaurora.org
Mon Aug 11 09:02:22 PDT 2014
Hi Gerolf,
I believe this commit is causing an ICE.
See: http://llvm.org/bugs/show_bug.cgi?id=20598
Reduced test case included.
Chad
> Author: ghoflehner
> Date: Thu Aug 7 16:40:58 2014
> New Revision: 215151
>
> URL: http://llvm.org/viewvc/llvm-project?rev=215151&view=rev
> Log:
> MachineCombiner Pass for selecting faster instruction sequence on AArch64
>
> Re-commit of r214832,r21469 with a work-around that
> avoids the previous problem with gcc build compilers
>
> The work-around is to use SmallVector instead of ArrayRef
> of basic blocks in preservesResourceLen()/MachineCombiner.cpp
>
>
> Added:
> llvm/trunk/lib/Target/AArch64/AArch64MachineCombinerPattern.h
> llvm/trunk/test/CodeGen/AArch64/madd-lohi.ll
> Modified:
> llvm/trunk/lib/CodeGen/MachineCombiner.cpp
> llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
> llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
>
> Modified: llvm/trunk/lib/CodeGen/MachineCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCombiner.cpp?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineCombiner.cpp Thu Aug 7 16:40:58 2014
> @@ -273,7 +273,9 @@ bool MachineCombiner::preservesResourceL
>
> // Compute current resource length
>
> - ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
> + //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
> + SmallVector <const MachineBasicBlock *, 1> MBBarr;
> + MBBarr.push_back(MBB);
> unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
>
> // Deal with SC rather than Instructions.
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td Thu Aug 7
> 16:40:58 2014
> @@ -1351,14 +1351,15 @@ class BaseMulAccum<bit isSub, bits<3> op
> }
>
> multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
> + // MADD/MSUB generation is decided by MachineCombiner.cpp
> def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
> - [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn,
> GPR32:$Rm)))]>,
> + [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn,
> GPR32:$Rm)))*/]>,
> Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
> let Inst{31} = 0;
> }
>
> def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
> - [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn,
> GPR64:$Rm)))]>,
> + [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn,
> GPR64:$Rm)))*/]>,
> Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
> let Inst{31} = 1;
> }
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Aug 7 16:40:58
> 2014
> @@ -14,6 +14,7 @@
> #include "AArch64InstrInfo.h"
> #include "AArch64Subtarget.h"
> #include "MCTargetDesc/AArch64AddressingModes.h"
> +#include "AArch64MachineCombinerPattern.h"
> #include "llvm/CodeGen/MachineFrameInfo.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineMemOperand.h"
> @@ -697,17 +698,12 @@ static bool UpdateOperandRegClass(Machin
> return true;
> }
>
> -/// optimizeCompareInstr - Convert the instruction supplying the argument
> to the
> -/// comparison into one that sets the zero bit in the flags register.
> -bool AArch64InstrInfo::optimizeCompareInstr(
> - MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int
> CmpMask,
> - int CmpValue, const MachineRegisterInfo *MRI) const {
> -
> - // Replace SUBSWrr with SUBWrr if NZCV is not used.
> - int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV,
> true);
> - if (Cmp_NZCV != -1) {
> +/// convertFlagSettingOpcode - return opcode that does not
> +/// set flags when possible. The caller is responsible to do
> +/// the actual substitution and legality checking.
> +static unsigned convertFlagSettingOpcode(MachineInstr *MI) {
> unsigned NewOpc;
> - switch (CmpInstr->getOpcode()) {
> + switch (MI->getOpcode()) {
> default:
> return false;
> case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break;
> @@ -727,7 +723,22 @@ bool AArch64InstrInfo::optimizeCompareIn
> case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break;
> case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break;
> }
> + return NewOpc;
> +}
>
> +/// optimizeCompareInstr - Convert the instruction supplying the argument
> to the
> +/// comparison into one that sets the zero bit in the flags register.
> +bool AArch64InstrInfo::optimizeCompareInstr(
> + MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int
> CmpMask,
> + int CmpValue, const MachineRegisterInfo *MRI) const {
> +
> + // Replace SUBSWrr with SUBWrr if NZCV is not used.
> + int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV,
> true);
> + if (Cmp_NZCV != -1) {
> + unsigned Opc = CmpInstr->getOpcode();
> + unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
> + if (NewOpc == Opc)
> + return false;
> const MCInstrDesc &MCID = get(NewOpc);
> CmpInstr->setDesc(MCID);
> CmpInstr->RemoveOperand(Cmp_NZCV);
> @@ -2185,3 +2196,448 @@ void AArch64InstrInfo::getNoopForMachoTa
> NopInst.setOpcode(AArch64::HINT);
> NopInst.addOperand(MCOperand::CreateImm(0));
> }
> +/// useMachineCombiner - return true when a target supports
> MachineCombiner
> +bool AArch64InstrInfo::useMachineCombiner(void) const {
> + // AArch64 supports the combiner
> + return true;
> +}
> +//
> +// True when Opc sets flag
> +static bool isCombineInstrSettingFlag(unsigned Opc) {
> + switch (Opc) {
> + case AArch64::ADDSWrr:
> + case AArch64::ADDSWri:
> + case AArch64::ADDSXrr:
> + case AArch64::ADDSXri:
> + case AArch64::SUBSWrr:
> + case AArch64::SUBSXrr:
> + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
> + case AArch64::SUBSWri:
> + case AArch64::SUBSXri:
> + return true;
> + default:
> + break;
> + }
> + return false;
> +}
> +//
> +// 32b Opcodes that can be combined with a MUL
> +static bool isCombineInstrCandidate32(unsigned Opc) {
> + switch (Opc) {
> + case AArch64::ADDWrr:
> + case AArch64::ADDWri:
> + case AArch64::SUBWrr:
> + case AArch64::ADDSWrr:
> + case AArch64::ADDSWri:
> + case AArch64::SUBSWrr:
> + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
> + case AArch64::SUBWri:
> + case AArch64::SUBSWri:
> + return true;
> + default:
> + break;
> + }
> + return false;
> +}
> +//
> +// 64b Opcodes that can be combined with a MUL
> +static bool isCombineInstrCandidate64(unsigned Opc) {
> + switch (Opc) {
> + case AArch64::ADDXrr:
> + case AArch64::ADDXri:
> + case AArch64::SUBXrr:
> + case AArch64::ADDSXrr:
> + case AArch64::ADDSXri:
> + case AArch64::SUBSXrr:
> + // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
> + case AArch64::SUBXri:
> + case AArch64::SUBSXri:
> + return true;
> + default:
> + break;
> + }
> + return false;
> +}
> +//
> +// Opcodes that can be combined with a MUL
> +static bool isCombineInstrCandidate(unsigned Opc) {
> + return (isCombineInstrCandidate32(Opc) ||
> isCombineInstrCandidate64(Opc));
> +}
> +
> +static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
> + unsigned MulOpc, unsigned ZeroReg) {
> + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
> + MachineInstr *MI = nullptr;
> + // We need a virtual register definition.
> + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
> + MI = MRI.getUniqueVRegDef(MO.getReg());
> + // And it needs to be in the trace (otherwise, it won't have a depth).
> + if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() !=
> MulOpc)
> + return false;
> +
> + assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
> + MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
> + MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4
> regs");
> +
> + // The third input reg must be zero.
> + if (MI->getOperand(3).getReg() != ZeroReg)
> + return false;
> +
> + // Must only used by the user we combine with.
> + if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
> + return false;
> +
> + return true;
> +}
> +
> +/// hasPattern - return true when there is potentially a faster code
> sequence
> +/// for an instruction chain ending in \p Root. All potential patterns
> are
> +/// listed
> +/// in the \p Pattern vector. Pattern should be sorted in priority order
> since
> +/// the pattern evaluator stops checking as soon as it finds a faster
> sequence.
> +
> +bool AArch64InstrInfo::hasPattern(
> + MachineInstr &Root,
> + SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
> + unsigned Opc = Root.getOpcode();
> + MachineBasicBlock &MBB = *Root.getParent();
> + bool Found = false;
> +
> + if (!isCombineInstrCandidate(Opc))
> + return 0;
> + if (isCombineInstrSettingFlag(Opc)) {
> + int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
> + // When NZCV is live bail out.
> + if (Cmp_NZCV == -1)
> + return 0;
> + unsigned NewOpc = convertFlagSettingOpcode(&Root);
> + // When opcode can't change bail out.
> + // CHECKME: do we miss any cases for opcode conversion?
> + if (NewOpc == Opc)
> + return 0;
> + Opc = NewOpc;
> + }
> +
> + switch (Opc) {
> + default:
> + break;
> + case AArch64::ADDWrr:
> + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
> + "ADDWrr does not have register operands");
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
> + Found = true;
> + }
> + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
> + Found = true;
> + }
> + break;
> + case AArch64::ADDXrr:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
> + Found = true;
> + }
> + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
> + Found = true;
> + }
> + break;
> + case AArch64::SUBWrr:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
> + Found = true;
> + }
> + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
> + Found = true;
> + }
> + break;
> + case AArch64::SUBXrr:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
> + Found = true;
> + }
> + if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
> + Found = true;
> + }
> + break;
> + case AArch64::ADDWri:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
> + Found = true;
> + }
> + break;
> + case AArch64::ADDXri:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
> + Found = true;
> + }
> + break;
> + case AArch64::SUBWri:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
> + AArch64::WZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
> + Found = true;
> + }
> + break;
> + case AArch64::SUBXri:
> + if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
> + AArch64::XZR)) {
> + Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
> + Found = true;
> + }
> + break;
> + }
> + return Found;
> +}
> +
> +/// genMadd - Generate madd instruction and combine mul and add.
> +/// Example:
> +/// MUL I=A,B,0
> +/// ADD R,I,C
> +/// ==> MADD R,A,B,C
> +/// \param Root is the ADD instruction
> +/// \param [out] InsInstr is a vector of machine instructions and will
> +/// contain the generated madd instruction
> +/// \param IdxMulOpd is index of operand in Root that is the result of
> +/// the MUL. In the example above IdxMulOpd is 1.
> +/// \param MaddOpc the opcode fo the madd instruction
> +static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo
> &MRI,
> + const TargetInstrInfo *TII, MachineInstr
> &Root,
> + SmallVectorImpl<MachineInstr *> &InsInstrs,
> + unsigned IdxMulOpd, unsigned MaddOpc) {
> + assert(IdxMulOpd == 1 || IdxMulOpd == 2);
> +
> + unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
> + MachineInstr *MUL =
> MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
> + MachineOperand R = Root.getOperand(0);
> + MachineOperand A = MUL->getOperand(1);
> + MachineOperand B = MUL->getOperand(2);
> + MachineOperand C = Root.getOperand(IdxOtherOpd);
> + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(),
> TII->get(MaddOpc))
> + .addOperand(R)
> + .addOperand(A)
> + .addOperand(B)
> + .addOperand(C);
> + // Insert the MADD
> + InsInstrs.push_back(MIB);
> + return MUL;
> +}
> +
> +/// genMaddR - Generate madd instruction and combine mul and add using
> +/// an extra virtual register
> +/// Example - an ADD intermediate needs to be stored in a register:
> +/// MUL I=A,B,0
> +/// ADD R,I,Imm
> +/// ==> ORR V, ZR, Imm
> +/// ==> MADD R,A,B,V
> +/// \param Root is the ADD instruction
> +/// \param [out] InsInstr is a vector of machine instructions and will
> +/// contain the generated madd instruction
> +/// \param IdxMulOpd is index of operand in Root that is the result of
> +/// the MUL. In the example above IdxMulOpd is 1.
> +/// \param MaddOpc the opcode fo the madd instruction
> +/// \param VR is a virtual register that holds the value of an ADD
> operand
> +/// (V in the example above).
> +static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo
> &MRI,
> + const TargetInstrInfo *TII, MachineInstr
> &Root,
> + SmallVectorImpl<MachineInstr *> &InsInstrs,
> + unsigned IdxMulOpd, unsigned MaddOpc,
> + unsigned VR) {
> + assert(IdxMulOpd == 1 || IdxMulOpd == 2);
> +
> + MachineInstr *MUL =
> MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
> + MachineOperand R = Root.getOperand(0);
> + MachineOperand A = MUL->getOperand(1);
> + MachineOperand B = MUL->getOperand(2);
> + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(),
> TII->get(MaddOpc))
> + .addOperand(R)
> + .addOperand(A)
> + .addOperand(B)
> + .addReg(VR);
> + // Insert the MADD
> + InsInstrs.push_back(MIB);
> + return MUL;
> +}
> +/// genAlternativeCodeSequence - when hasPattern() finds a pattern
> +/// this function generates the instructions that could replace the
> +/// original code sequence
> +void AArch64InstrInfo::genAlternativeCodeSequence(
> + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
> + SmallVectorImpl<MachineInstr *> &InsInstrs,
> + SmallVectorImpl<MachineInstr *> &DelInstrs,
> + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
> + MachineBasicBlock &MBB = *Root.getParent();
> + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
> + MachineFunction &MF = *MBB.getParent();
> + const TargetInstrInfo *TII =
> MF.getTarget().getSubtargetImpl()->getInstrInfo();
> +
> + MachineInstr *MUL;
> + unsigned Opc;
> + switch (Pattern) {
> + default:
> + // signal error.
> + break;
> + case MachineCombinerPattern::MC_MULADDW_OP1:
> + case MachineCombinerPattern::MC_MULADDX_OP1:
> + // MUL I=A,B,0
> + // ADD R,I,C
> + // ==> MADD R,A,B,C
> + // --- Create(MADD);
> + Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP1 ?
> AArch64::MADDWrrr
> + :
> AArch64::MADDXrrr;
> + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc);
> + break;
> + case MachineCombinerPattern::MC_MULADDW_OP2:
> + case MachineCombinerPattern::MC_MULADDX_OP2:
> + // MUL I=A,B,0
> + // ADD R,C,I
> + // ==> MADD R,A,B,C
> + // --- Create(MADD);
> + Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP2 ?
> AArch64::MADDWrrr
> + :
> AArch64::MADDXrrr;
> + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
> + break;
> + case MachineCombinerPattern::MC_MULADDWI_OP1:
> + case MachineCombinerPattern::MC_MULADDXI_OP1:
> + // MUL I=A,B,0
> + // ADD R,I,Imm
> + // ==> ORR V, ZR, Imm
> + // ==> MADD R,A,B,V
> + // --- Create(MADD);
> + {
> + const TargetRegisterClass *RC =
> + MRI.getRegClass(Root.getOperand(1).getReg());
> + unsigned NewVR = MRI.createVirtualRegister(RC);
> + unsigned BitSize, OrrOpc, ZeroReg;
> + if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
> + BitSize = 32;
> + OrrOpc = AArch64::ORRWri;
> + ZeroReg = AArch64::WZR;
> + Opc = AArch64::MADDWrrr;
> + } else {
> + OrrOpc = AArch64::ORRXri;
> + BitSize = 64;
> + ZeroReg = AArch64::XZR;
> + Opc = AArch64::MADDXrrr;
> + }
> + uint64_t Imm = Root.getOperand(2).getImm();
> +
> + if (Root.getOperand(3).isImm()) {
> + unsigned val = Root.getOperand(3).getImm();
> + Imm = Imm << val;
> + }
> + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
> + uint64_t Encoding;
> +
> + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
> + MachineInstrBuilder MIB1 =
> + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
> + .addOperand(MachineOperand::CreateReg(NewVR,
> RegState::Define))
> + .addReg(ZeroReg)
> + .addImm(Encoding);
> + InsInstrs.push_back(MIB1);
> + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
> + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
> + }
> + }
> + break;
> + case MachineCombinerPattern::MC_MULSUBW_OP1:
> + case MachineCombinerPattern::MC_MULSUBX_OP1: {
> + // MUL I=A,B,0
> + // SUB R,I, C
> + // ==> SUB V, 0, C
> + // ==> MADD R,A,B,V // = -C + A*B
> + // --- Create(MADD);
> + const TargetRegisterClass *RC =
> + MRI.getRegClass(Root.getOperand(1).getReg());
> + unsigned NewVR = MRI.createVirtualRegister(RC);
> + unsigned SubOpc, ZeroReg;
> + if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
> + SubOpc = AArch64::SUBWrr;
> + ZeroReg = AArch64::WZR;
> + Opc = AArch64::MADDWrrr;
> + } else {
> + SubOpc = AArch64::SUBXrr;
> + ZeroReg = AArch64::XZR;
> + Opc = AArch64::MADDXrrr;
> + }
> + // SUB NewVR, 0, C
> + MachineInstrBuilder MIB1 =
> + BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc))
> + .addOperand(MachineOperand::CreateReg(NewVR,
> RegState::Define))
> + .addReg(ZeroReg)
> + .addOperand(Root.getOperand(2));
> + InsInstrs.push_back(MIB1);
> + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
> + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
> + } break;
> + case MachineCombinerPattern::MC_MULSUBW_OP2:
> + case MachineCombinerPattern::MC_MULSUBX_OP2:
> + // MUL I=A,B,0
> + // SUB R,C,I
> + // ==> MSUB R,A,B,C (computes C - A*B)
> + // --- Create(MSUB);
> + Opc = Pattern == MachineCombinerPattern::MC_MULSUBW_OP2 ?
> AArch64::MSUBWrrr
> + :
> AArch64::MSUBXrrr;
> + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
> + break;
> + case MachineCombinerPattern::MC_MULSUBWI_OP1:
> + case MachineCombinerPattern::MC_MULSUBXI_OP1: {
> + // MUL I=A,B,0
> + // SUB R,I, Imm
> + // ==> ORR V, ZR, -Imm
> + // ==> MADD R,A,B,V // = -Imm + A*B
> + // --- Create(MADD);
> + const TargetRegisterClass *RC =
> + MRI.getRegClass(Root.getOperand(1).getReg());
> + unsigned NewVR = MRI.createVirtualRegister(RC);
> + unsigned BitSize, OrrOpc, ZeroReg;
> + if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
> + BitSize = 32;
> + OrrOpc = AArch64::ORRWri;
> + ZeroReg = AArch64::WZR;
> + Opc = AArch64::MADDWrrr;
> + } else {
> + OrrOpc = AArch64::ORRXri;
> + BitSize = 64;
> + ZeroReg = AArch64::XZR;
> + Opc = AArch64::MADDXrrr;
> + }
> + int Imm = Root.getOperand(2).getImm();
> + if (Root.getOperand(3).isImm()) {
> + unsigned val = Root.getOperand(3).getImm();
> + Imm = Imm << val;
> + }
> + uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
> + uint64_t Encoding;
> + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
> + MachineInstrBuilder MIB1 =
> + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
> + .addOperand(MachineOperand::CreateReg(NewVR,
> RegState::Define))
> + .addReg(ZeroReg)
> + .addImm(Encoding);
> + InsInstrs.push_back(MIB1);
> + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
> + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
> + }
> + } break;
> + }
> + // Record MUL and ADD/SUB for deletion
> + DelInstrs.push_back(MUL);
> + DelInstrs.push_back(&Root);
> +
> + return;
> +}
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Thu Aug 7 16:40:58
> 2014
> @@ -17,6 +17,7 @@
> #include "AArch64.h"
> #include "AArch64RegisterInfo.h"
> #include "llvm/Target/TargetInstrInfo.h"
> +#include "llvm/CodeGen/MachineCombinerPattern.h"
>
> #define GET_INSTRINFO_HEADER
> #include "AArch64GenInstrInfo.inc"
> @@ -156,9 +157,26 @@ public:
> bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
> unsigned SrcReg2, int CmpMask, int CmpValue,
> const MachineRegisterInfo *MRI) const
> override;
> + /// hasPattern - return true when there is potentially a faster code
> sequence
> + /// for an instruction chain ending in <Root>. All potential patterns
> are
> + /// listed
> + /// in the <Pattern> array.
> + virtual bool hasPattern(
> + MachineInstr &Root,
> + SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern)
> const;
> +
> + /// genAlternativeCodeSequence - when hasPattern() finds a pattern
> + /// this function generates the instructions that could replace the
> + /// original code sequence
> + virtual void genAlternativeCodeSequence(
> + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
> + SmallVectorImpl<MachineInstr *> &InsInstrs,
> + SmallVectorImpl<MachineInstr *> &DelInstrs,
> + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
> + /// useMachineCombiner - AArch64 supports MachineCombiner
> + virtual bool useMachineCombiner(void) const;
>
> bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
> -
> private:
> void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
> MachineBasicBlock *TBB,
>
> Added: llvm/trunk/lib/Target/AArch64/AArch64MachineCombinerPattern.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64MachineCombinerPattern.h?rev=215151&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64MachineCombinerPattern.h (added)
> +++ llvm/trunk/lib/Target/AArch64/AArch64MachineCombinerPattern.h Thu Aug
> 7 16:40:58 2014
> @@ -0,0 +1,42 @@
> +//===- AArch64MachineCombinerPattern.h
> -===//
> +//===- AArch64 instruction pattern supported by combiner
> -===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines instruction pattern supported by combiner
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H
> +#define LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H
> +
> +namespace llvm {
> +
> +/// Enumeration of instruction pattern supported by machine combiner
> +///
> +///
> +namespace MachineCombinerPattern {
> +enum MC_PATTERN : int {
> + MC_NONE = 0,
> + MC_MULADDW_OP1 = 1,
> + MC_MULADDW_OP2 = 2,
> + MC_MULSUBW_OP1 = 3,
> + MC_MULSUBW_OP2 = 4,
> + MC_MULADDWI_OP1 = 5,
> + MC_MULSUBWI_OP1 = 6,
> + MC_MULADDX_OP1 = 7,
> + MC_MULADDX_OP2 = 8,
> + MC_MULSUBX_OP1 = 9,
> + MC_MULSUBX_OP2 = 10,
> + MC_MULADDXI_OP1 = 11,
> + MC_MULSUBXI_OP1 = 12
> +};
> +} // end namespace MachineCombinerPattern
> +} // end namespace llvm
> +
> +#endif
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Aug 7
> 16:40:58 2014
> @@ -24,6 +24,10 @@ static cl::opt<bool>
> EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
> cl::init(true), cl::Hidden);
>
> +static cl::opt<bool> EnableMCR("aarch64-mcr",
> + cl::desc("Enable the machine combiner
> pass"),
> + cl::init(true), cl::Hidden);
> +
> static cl::opt<bool>
> EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for
> AArch64"),
> cl::init(true), cl::Hidden);
> @@ -180,6 +184,8 @@ bool AArch64PassConfig::addInstSelector(
> bool AArch64PassConfig::addILPOpts() {
> if (EnableCCMP)
> addPass(createAArch64ConditionalCompares());
> + if (EnableMCR)
> + addPass(&MachineCombinerID);
> if (EnableEarlyIfConversion)
> addPass(&EarlyIfConverterID);
> if (EnableStPairSuppress)
>
> Added: llvm/trunk/test/CodeGen/AArch64/madd-lohi.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/madd-lohi.ll?rev=215151&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/madd-lohi.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/madd-lohi.ll Thu Aug 7 16:40:58 2014
> @@ -0,0 +1,19 @@
> +; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
> +; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck
> --check-prefix=CHECK-BE %s
> +
> +define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
> +; CHECK-LABEL: test_128bitmul:
> +; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
> +; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
> +; CHECK: madd x1, x1, x2, [[PART1]]
> +; CHECK: mul x0, x0, x2
> +
> +; CHECK-BE-LABEL: test_128bitmul:
> +; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
> +; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
> +; CHECK-BE: madd x0, x0, x3, [[PART1]]
> +; CHECK-BE: mul x1, x1, x3
> +
> + %prod = mul i128 %lhs, %rhs
> + ret i128 %prod
> +}
>
> Modified: llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll?rev=215151&r1=215150&r2=215151&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll Thu Aug 7 16:40:58 2014
> @@ -1,17 +1,16 @@
> -; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
> -; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck
> --check-prefix=CHECK-BE %s
> -
> +; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck
> %s
> +; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - |
> FileCheck --check-prefix=CHECK-BE %s
> define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
> ; CHECK-LABEL: test_128bitmul:
> +; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
> ; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
> -; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
> -; CHECK: madd x1, x1, x2, [[PART1]]
> +; CHECK: mul [[PART2:x[0-9]+]], x1, x2
> ; CHECK: mul x0, x0, x2
>
> ; CHECK-BE-LABEL: test_128bitmul:
> +; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2
> ; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
> -; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
> -; CHECK-BE: madd x0, x0, x3, [[PART1]]
> +; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3
> ; CHECK-BE: mul x1, x1, x3
>
> %prod = mul i128 %lhs, %rhs
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
More information about the llvm-commits
mailing list