[llvm] r219584 - [X86] Memory folding for commutative instructions.

NAKAMURA Takumi geek4civic at gmail.com
Mon Oct 13 02:47:42 PDT 2014


I have reverted it in r219595, since it broke i686 selfhosting.

  - http://bb.pgr.jp/builders/clang-3stage-i686-linux/builds/797
  - http://bb.pgr.jp/builders/clang-3stage-i686-cygwin/builds/667

2014-10-12 19:52 GMT+09:00 Simon Pilgrim <llvm-dev at redking.me.uk>:
> Author: rksimon
> Date: Sun Oct 12 05:52:55 2014
> New Revision: 219584
>
> URL: http://llvm.org/viewvc/llvm-project?rev=219584&view=rev
> Log:
> [X86] Memory folding for commutative instructions.
>
> This patch improves support for commutative instructions in the x86 memory folding implementation by attempting to fold a commuted version of the instruction if the original folding fails - if that folding fails as well the instruction is 're-commuted' back to its original order before returning.
>
> This mainly helps the stack inliner better fold reloads of 3 (or more) operand instructions (VEX encoded SSE etc.) but by performing this in the lowest foldMemoryOperandImpl implementation it also replaces the X86InstrInfo::optimizeLoadInstr version and is now used by FastISel too.
>
> Differential Revision: http://reviews.llvm.org/D5701
>
> Added:
>     llvm/trunk/test/CodeGen/X86/avx1-stack-reload-folding.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86FastISel.cpp
>     llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>     llvm/trunk/lib/Target/X86/X86InstrInfo.h
>
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=219584&r1=219583&r2=219584&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Sun Oct 12 05:52:55 2014
> @@ -3337,7 +3337,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(Ma
>    AM.getFullAddress(AddrOps);
>
>    MachineInstr *Result =
> -    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
> +    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment, /*AllowCommute=*/ true);
>    if (!Result)
>      return false;
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=219584&r1=219583&r2=219584&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Oct 12 05:52:55 2014
> @@ -3926,55 +3926,34 @@ optimizeLoadInstr(MachineInstr *MI, cons
>    if (!DefMI->isSafeToMove(this, nullptr, SawStore))
>      return nullptr;
>
> -  // We try to commute MI if possible.
> -  unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
> -  for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
> -    // Collect information about virtual register operands of MI.
> -    unsigned SrcOperandId = 0;
> -    bool FoundSrcOperand = false;
> -    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
> -      MachineOperand &MO = MI->getOperand(i);
> -      if (!MO.isReg())
> -        continue;
> -      unsigned Reg = MO.getReg();
> -      if (Reg != FoldAsLoadDefReg)
> -        continue;
> -      // Do not fold if we have a subreg use or a def or multiple uses.
> -      if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
> -        return nullptr;
> -
> -      SrcOperandId = i;
> -      FoundSrcOperand = true;
> -    }
> -    if (!FoundSrcOperand) return nullptr;
> -
> -    // Check whether we can fold the def into SrcOperandId.
> -    SmallVector<unsigned, 8> Ops;
> -    Ops.push_back(SrcOperandId);
> -    MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
> -    if (FoldMI) {
> -      FoldAsLoadDefReg = 0;
> -      return FoldMI;
> -    }
> -
> -    if (Idx == 1) {
> -      // MI was changed but it didn't help, commute it back!
> -      commuteInstruction(MI, false);
> +  // Collect information about virtual register operands of MI.
> +  unsigned SrcOperandId = 0;
> +  bool FoundSrcOperand = false;
> +  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
> +    MachineOperand &MO = MI->getOperand(i);
> +    if (!MO.isReg())
> +      continue;
> +    unsigned Reg = MO.getReg();
> +    if (Reg != FoldAsLoadDefReg)
> +      continue;
> +    // Do not fold if we have a subreg use or a def or multiple uses.
> +    if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
>        return nullptr;
> -    }
>
> -    // Check whether we can commute MI and enable folding.
> -    if (MI->isCommutable()) {
> -      MachineInstr *NewMI = commuteInstruction(MI, false);
> -      // Unable to commute.
> -      if (!NewMI) return nullptr;
> -      if (NewMI != MI) {
> -        // New instruction. It doesn't need to be kept.
> -        NewMI->eraseFromParent();
> -        return nullptr;
> -      }
> -    }
> +    SrcOperandId = i;
> +    FoundSrcOperand = true;
> +  }
> +  if (!FoundSrcOperand) return nullptr;
> +
> +  // Check whether we can fold the def into SrcOperandId.
> +  SmallVector<unsigned, 8> Ops;
> +  Ops.push_back(SrcOperandId);
> +  MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
> +  if (FoldMI) {
> +    FoldAsLoadDefReg = 0;
> +    return FoldMI;
>    }
> +
>    return nullptr;
>  }
>
> @@ -4134,7 +4113,7 @@ MachineInstr*
>  X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
>                                      MachineInstr *MI, unsigned i,
>                                      const SmallVectorImpl<MachineOperand> &MOs,
> -                                    unsigned Size, unsigned Align) const {
> +                                    unsigned Size, unsigned Align, bool AllowCommute) const {
>    const DenseMap<unsigned,
>                   std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
>    bool isCallRegIndirect = Subtarget.callRegIndirect();
> @@ -4231,6 +4210,46 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
>      }
>    }
>
> +  // If the instruction and target operand are commutable, commute the instruction and try again.
> +  if (AllowCommute) {
> +    unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
> +    if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
> +      if ((CommuteOpIdx1 == OriginalOpIdx) || (CommuteOpIdx2 == OriginalOpIdx)) {
> +        MachineInstr* CommutedMI = commuteInstruction(MI, false);
> +        if (!CommutedMI) {
> +          // Unable to commute.
> +          return nullptr;
> +        }
> +        if (CommutedMI != MI) {
> +          // New instruction. We can't fold from this.
> +          CommutedMI->eraseFromParent();
> +          return nullptr;
> +        }
> +
> +        // Attempt to fold with the commuted version of the instruction.
> +        unsigned CommuteOpIdx = (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
> +        NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx, MOs, Size, Align, /*AllowCommute=*/ false);
> +        if (NewMI)
> +          return NewMI;
> +
> +        // Folding failed again - undo the commute before returning.
> +        MachineInstr* UncommutedMI = commuteInstruction(MI, false);
> +        if (!UncommutedMI) {
> +          // Unable to commute.
> +          return nullptr;
> +        }
> +        if (UncommutedMI != MI) {
> +          // New instruction. It doesn't need to be kept.
> +          UncommutedMI->eraseFromParent();
> +          return nullptr;
> +        }
> +
> +        // Return here to prevent duplicate fuse failure report.
> +        return nullptr;
> +      }
> +    }
> +  }
> +
>    // No fusion
>    if (PrintFailedFusing && !MI->isCopy())
>      dbgs() << "We failed to fuse operand " << i << " in " << *MI;
> @@ -4440,7 +4459,7 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
>
>    SmallVector<MachineOperand,4> MOs;
>    MOs.push_back(MachineOperand::CreateFI(FrameIndex));
> -  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
> +  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment, /*AllowCommute=*/ true);
>  }
>
>  static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
> @@ -4593,7 +4612,7 @@ MachineInstr* X86InstrInfo::foldMemoryOp
>      break;
>    }
>    }
> -  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
> +  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment, /*AllowCommute=*/ true);
>  }
>
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=219584&r1=219583&r2=219584&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Sun Oct 12 05:52:55 2014
> @@ -404,7 +404,7 @@ public:
>                                        MachineInstr* MI,
>                                        unsigned OpNum,
>                                        const SmallVectorImpl<MachineOperand> &MOs,
> -                                      unsigned Size, unsigned Alignment) const;
> +                                      unsigned Size, unsigned Alignment, bool AllowCommute) const;
>
>    void
>    getUnconditionalBranch(MCInst &Branch,
>
> Added: llvm/trunk/test/CodeGen/X86/avx1-stack-reload-folding.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx1-stack-reload-folding.ll?rev=219584&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx1-stack-reload-folding.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/avx1-stack-reload-folding.ll Sun Oct 12 05:52:55 2014
> @@ -0,0 +1,16 @@
> +; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
> +
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-unknown"
> +
> +; Function Attrs: nounwind readonly uwtable
> +define <32 x double> @_Z14vstack_foldDv32_dS_(<32 x double> %a, <32 x double> %b) #0 {
> +  %1 = fadd <32 x double> %a, %b
> +  %2 = fsub <32 x double> %a, %b
> +  %3 = fmul <32 x double> %1, %2
> +  ret <32 x double> %3
> +
> +  ;CHECK-NOT:  vmovapd {{.*#+}} 32-byte Reload
> +  ;CHECK:       vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
> +  ;CHECK-NOT:  vmovapd {{.*#+}} 32-byte Reload
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list