[llvm] r249079 - AMDGPU: Make SIInsertWaits about a factor of 4 faster

Rafael EspĂ­ndola via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 1 15:38:43 PDT 2015


Looks like this broke some bots:

http://lab.llvm.org:8011/builders/lld-x86_64-darwin13/builds/15204/steps/build_Lld/logs/stdio

On 1 October 2015 at 17:43, Matt Arsenault via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: arsenm
> Date: Thu Oct  1 16:43:15 2015
> New Revision: 249079
>
> URL: http://llvm.org/viewvc/llvm-project?rev=249079&view=rev
> Log:
> AMDGPU: Make SIInsertWaits about a factor of 4 faster
>
> This was the slowest target custom pass and was spending 80%
> of the time in getMinimalPhysRegClass which was called
> for every register operand.
>
> Try to use the statically known register class when possible from
> the instruction's MCOperandInfo. There are a few pseudo instructions
> which are not well behaved with unknown register classes which still
> require the expensive physical register class search.
>
> There are a few other possibilities for making this even faster,
> such as not inspecting implicit operands. For now those are checked
> because it is technically possible to have a scalar load into
> exec or vcc which can be implicitly used.
>
> Modified:
>     llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=249079&r1=249078&r2=249079&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Thu Oct  1 16:43:15 2015
> @@ -91,7 +91,8 @@ private:
>    bool isOpRelevant(MachineOperand &Op);
>
>    /// \brief Get register interval an operand affects.
> -  RegInterval getRegInterval(MachineOperand &Op);
> +  RegInterval getRegInterval(const TargetRegisterClass *RC,
> +                             const MachineOperand &Reg) const;
>
>    /// \brief Handle instructions async components
>    void pushInstruction(MachineBasicBlock &MBB,
> @@ -142,8 +143,7 @@ FunctionPass *llvm::createSIInsertWaits(
>  }
>
>  Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
> -
> -  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
> +  uint64_t TSFlags = MI.getDesc().TSFlags;
>    Counters Result = { { 0, 0, 0 } };
>
>    Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
> @@ -161,10 +161,9 @@ Counters SIInsertWaits::getHwCounts(Mach
>          MachineOperand &Op = MI.getOperand(0);
>          assert(Op.isReg() && "First LGKM operand must be a register!");
>
> -        unsigned Reg = Op.getReg();
> -
>          // XXX - What if this is a write into a super register?
> -        unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
> +        const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
> +        unsigned Size = RC->getSize();
>          Result.Named.LGKM = Size > 4 ? 2 : 1;
>        } else {
>          // s_dcache_inv etc. do not have a a destination register. Assume we
> @@ -185,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(Mach
>  }
>
>  bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
> -
>    // Constants are always irrelevant
> -  if (!Op.isReg())
> +  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
>      return false;
>
>    // Defines are always relevant
> @@ -236,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(Machine
>    return false;
>  }
>
> -RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
> -
> -  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
> -    return std::make_pair(0, 0);
> -
> -  unsigned Reg = Op.getReg();
> -  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
> -
> +RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
> +                                          const MachineOperand &Reg) const {
> +  unsigned Size = RC->getSize();
>    assert(Size >= 4);
>
>    RegInterval Result;
> -  Result.first = TRI->getEncodingValue(Reg);
> +  Result.first = TRI->getEncodingValue(Reg.getReg());
>    Result.second = Result.first + Size / 4;
>
>    return Result;
> @@ -305,12 +298,12 @@ void SIInsertWaits::pushInstruction(Mach
>    }
>
>    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
> -
>      MachineOperand &Op = I->getOperand(i);
>      if (!isOpRelevant(Op))
>        continue;
>
> -    RegInterval Interval = getRegInterval(Op);
> +    const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
> +    RegInterval Interval = getRegInterval(RC, Op);
>      for (unsigned j = Interval.first; j < Interval.second; ++j) {
>
>        // Remember which registers we define
> @@ -405,12 +398,18 @@ Counters SIInsertWaits::handleOperands(M
>    if (MI.getOpcode() == AMDGPU::S_SENDMSG)
>      return LastIssued;
>
> -  // For each register affected by this
> -  // instruction increase the result sequence
> +  // For each register affected by this instruction increase the result
> +  // sequence.
> +  //
> +  // TODO: We could probably just look at explicit operands if we removed VCC /
> +  // EXEC from SMRD dest reg classes.
>    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
> -
>      MachineOperand &Op = MI.getOperand(i);
> -    RegInterval Interval = getRegInterval(Op);
> +    if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
> +      continue;
> +
> +    const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
> +    RegInterval Interval = getRegInterval(RC, Op);
>      for (unsigned j = Interval.first; j < Interval.second; ++j) {
>
>        if (Op.isDef()) {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=249079&r1=249078&r2=249079&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Thu Oct  1 16:43:15 2015
> @@ -326,6 +326,8 @@ unsigned SIRegisterInfo::getHWRegIndex(u
>    return getEncodingValue(Reg) & 0xff;
>  }
>
> +// FIXME: This is very slow. It might be worth creating a map from physreg to
> +// register class.
>  const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
>    assert(!TargetRegisterInfo::isVirtualRegister(Reg));
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list