[llvm-commits] [llvm] r171524 - in /llvm/trunk: lib/Target/X86/CMakeLists.txt lib/Target/X86/X86.h lib/Target/X86/X86.td lib/Target/X86/X86PadShortFunction.cpp lib/Target/X86/X86Subtarget.cpp lib/Target/X86/X86Subtarget.h lib/Target/X86/X86TargetMachine.cpp test/CodeGen/X86/atom-pad-short-functions.ll test/CodeGen/X86/fast-isel-x86-64.ll test/CodeGen/X86/ret-mmx.ll test/CodeGen/X86/select.ll

Nadav Rotem nrotem at apple.com
Fri Jan 4 13:21:27 PST 2013


Also a few other comments. Please use DenseMap and not std::map. Please check for Os, and do not increase the code size if the 'optforsize' attribute is set. 

On Jan 4, 2013, at 12:54 PM, Preston Gurd <preston.gurd at intel.com> wrote:

> Author: pgurd
> Date: Fri Jan  4 14:54:54 2013
> New Revision: 171524
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=171524&view=rev
> Log:
> The current Intel Atom microarchitecture has a feature whereby when a function
> returns early then it is slightly faster to execute a sequence of NOP
> instructions to wait until the return address is ready,
> as opposed to simply stalling on the ret instruction
> until the return address is ready.
> 
> When compiling for X86 Atom only, this patch will run a pass, called
> "X86PadShortFunction" which will add NOP instructions where less than four
> cycles elapse between function entry and return.
> 
> It includes tests.
> 
> Patch by Andy Zhang.
> 
> 
> Added:
>    llvm/trunk/lib/Target/X86/X86PadShortFunction.cpp
>    llvm/trunk/test/CodeGen/X86/atom-pad-short-functions.ll
> Modified:
>    llvm/trunk/lib/Target/X86/CMakeLists.txt
>    llvm/trunk/lib/Target/X86/X86.h
>    llvm/trunk/lib/Target/X86/X86.td
>    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
>    llvm/trunk/lib/Target/X86/X86Subtarget.h
>    llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
>    llvm/trunk/test/CodeGen/X86/fast-isel-x86-64.ll
>    llvm/trunk/test/CodeGen/X86/ret-mmx.ll
>    llvm/trunk/test/CodeGen/X86/select.ll
> 
> Modified: llvm/trunk/lib/Target/X86/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/CMakeLists.txt?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Target/X86/CMakeLists.txt Fri Jan  4 14:54:54 2013
> @@ -25,6 +25,7 @@
>   X86JITInfo.cpp
>   X86MCInstLower.cpp
>   X86MachineFunctionInfo.cpp
> +  X86PadShortFunction.cpp
>   X86RegisterInfo.cpp
>   X86SelectionDAGInfo.cpp
>   X86Subtarget.cpp
> 
> Modified: llvm/trunk/lib/Target/X86/X86.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.h?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86.h (original)
> +++ llvm/trunk/lib/Target/X86/X86.h Fri Jan  4 14:54:54 2013
> @@ -63,6 +63,11 @@
> ///
> FunctionPass *createEmitX86CodeToMemory();
> 
> +/// createX86PadShortFunctions - Return a pass that pads short functions
> +/// with NOOPs. This will prevent a stall when returning from the function
> +/// on the Atom.
> +FunctionPass *createX86PadShortFunctions();
> +
> } // End llvm namespace
> 
> #endif
> 
> Modified: llvm/trunk/lib/Target/X86/X86.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86.td (original)
> +++ llvm/trunk/lib/Target/X86/X86.td Fri Jan  4 14:54:54 2013
> @@ -123,8 +123,11 @@
> def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
>                                      "Use LEA for adjusting the stack pointer">;
> def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
> -                          "HasSlowDivide", "true",
> -                          "Use small divide for positive values less than 256">;
> +                                     "HasSlowDivide", "true",
> +                                     "Use small divide for positive values less than 256">;
> +def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
> +                                     "PadShortFunctions", "true",
> +                                     "Pad short functions">;
> 
> //===----------------------------------------------------------------------===//
> // X86 processors supported.
> @@ -167,7 +170,7 @@
>                                FeatureSlowBTMem]>;
> def : AtomProc<"atom",        [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
>                                FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
> -                               FeatureSlowDivide]>;
> +                               FeatureSlowDivide, FeaturePadShortFunctions]>;
> // "Arrandale" along with corei3 and corei5
> def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
>                                FeatureSlowBTMem, FeatureFastUAMem,
> 
> Added: llvm/trunk/lib/Target/X86/X86PadShortFunction.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86PadShortFunction.cpp?rev=171524&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86PadShortFunction.cpp (added)
> +++ llvm/trunk/lib/Target/X86/X86PadShortFunction.cpp Fri Jan  4 14:54:54 2013
> @@ -0,0 +1,184 @@
> +//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines the pass which will pad short functions to prevent
> +// a stall if a function returns before the return address is ready. This
> +// is needed for some Intel Atom processors.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include <map>
> +#include <algorithm>
> +
> +#define DEBUG_TYPE "x86-pad-short-functions"
> +#include "X86.h"
> +#include "X86InstrInfo.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/Passes.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +using namespace llvm;
> +
> +STATISTIC(NumBBsPadded, "Number of basic blocks padded");
> +
> +namespace {
> +  struct PadShortFunc : public MachineFunctionPass {
> +    static char ID;
> +    PadShortFunc() : MachineFunctionPass(ID)
> +                   , Threshold(4)
> +    {}
> +
> +    virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> +    virtual const char *getPassName() const
> +    {
> +      return "X86 Atom pad short functions";
> +    }
> +
> +  private:
> +    bool addPadding(MachineFunction &MF,
> +                    MachineBasicBlock &MBB,
> +                    MachineBasicBlock::iterator &MBBI,
> +                    unsigned int NOOPsToAdd);
> +
> +    void findReturn(MachineFunction &MF,
> +                    MachineBasicBlock &MBB,
> +                    unsigned int Cycles);
> +
> +    bool cyclesUntilReturn(MachineFunction &MF,
> +                        MachineBasicBlock &MBB,
> +                        unsigned int &Cycles,
> +                        MachineBasicBlock::iterator *Location = 0);
> +
> +    const unsigned int Threshold;
> +    std::map<int, unsigned int> ReturnBBs;
> +  };
> +
> +  char PadShortFunc::ID = 0;
> +}
> +
> +FunctionPass *llvm::createX86PadShortFunctions() {
> +  return new PadShortFunc();
> +}
> +
> +/// runOnMachineFunction - Loop over all of the basic blocks, inserting
> +/// NOOP instructions before early exits.
> +bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
> +  // Process all basic blocks.
> +  ReturnBBs.clear();
> +
> +  // Search through basic blocks and mark the ones that have early returns
> +  findReturn(MF, *MF.begin(), 0);
> +
> +  int BBNum;
> +  MachineBasicBlock::iterator ReturnLoc;
> +  MachineBasicBlock *MBB;
> +
> +  unsigned int Cycles = 0;
> +  unsigned int BBCycles;
> +
> +  // Pad the identified basic blocks with NOOPs
> +  for (std::map<int, unsigned int>::iterator I = ReturnBBs.begin();
> +       I != ReturnBBs.end(); ++I) {
> +    BBNum = I->first;
> +    Cycles = I->second;
> +
> +    if (Cycles < Threshold) {
> +      MBB = MF.getBlockNumbered(BBNum);
> +      if (!cyclesUntilReturn(MF, *MBB, BBCycles, &ReturnLoc))
> +        continue;
> +
> +      addPadding(MF, *MBB, ReturnLoc, Threshold - Cycles);
> +      NumBBsPadded++;
> +    }
> +  }
> +
> +  return false;
> +}
> +
> +/// findReturn - Starting at MBB, follow control flow and add all
> +/// basic blocks that contain a return to ReturnBBs.
> +void PadShortFunc::findReturn(MachineFunction &MF,
> +                              MachineBasicBlock &MBB,
> +                              unsigned int Cycles)
> +{
> +  // If this BB has a return, note how many cycles it takes to get there.
> +  bool hasReturn = cyclesUntilReturn(MF, MBB, Cycles);
> +  if (Cycles >= Threshold)
> +    return;
> +
> +  if (hasReturn) {
> +    int BBNum = MBB.getNumber();
> +    ReturnBBs[BBNum] = std::max(ReturnBBs[BBNum], Cycles);
> +
> +    return;
> +  }
> +
> +  // Follow branches in BB and look for returns
> +  for (MachineBasicBlock::succ_iterator I = MBB.succ_begin();
> +       I != MBB.succ_end(); ++I) {
> +    findReturn(MF, **I, Cycles);
> +  }
> +}
> +
> +/// cyclesUntilReturn - if the MBB has a return instruction, set Location to
> +/// to the instruction and return true. Return false otherwise.
> +/// Cycles will be incremented by the number of cycles taken to reach the
> +/// return or the end of the BB, whichever occurs first.
> +bool PadShortFunc::cyclesUntilReturn(MachineFunction &MF,
> +                                  MachineBasicBlock &MBB,
> +                                  unsigned int &Cycles,
> +                                  MachineBasicBlock::iterator *Location)
> +{
> +  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> +  const TargetMachine &Target = MF.getTarget();
> +
> +  for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end();
> +       ++MBBI) {
> +    MachineInstr *MI = MBBI;
> +    // Mark basic blocks with a return instruction. Calls to other functions
> +    // do not count because the called function will be padded, if necessary
> +    if (MI->isReturn() && !MI->isCall()) {
> +      if (Location)
> +        *Location = MBBI;
> +      return true;
> +    }
> +
> +    Cycles += TII.getInstrLatency(Target.getInstrItineraryData(), MI);
> +  }
> +
> +  return false;
> +}
> +
> +/// addPadding - Add the given number of NOOP instructions to the function
> +/// right before the return at MBBI
> +bool PadShortFunc::addPadding(MachineFunction &MF,
> +                              MachineBasicBlock &MBB,
> +                              MachineBasicBlock::iterator &MBBI,
> +                              unsigned int NOOPsToAdd)
> +{
> +  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> +
> +  DebugLoc DL = MBBI->getDebugLoc();
> +
> +  while (NOOPsToAdd-- > 0) {
> +    // Since Atom has two instruction execution ports,
> +    // the code emits two noops, which will be executed in parallell
> +    // during one cycle.
> +    BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP));
> +    BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP));
> +  }
> +
> +  return true;
> +}
> +
> 
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Jan  4 14:54:54 2013
> @@ -350,6 +350,7 @@
>   , UseLeaForSP(false)
>   , HasSlowDivide(false)
>   , PostRAScheduler(false)
> +  , PadShortFunctions(false)
>   , stackAlignment(4)
>   // FIXME: this is a known good value for Yonah. How about others?
>   , MaxInlineSizeThreshold(128)
> 
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Jan  4 14:54:54 2013
> @@ -146,6 +146,10 @@
>   /// PostRAScheduler - True if using post-register-allocation scheduler.
>   bool PostRAScheduler;
> 
> +  /// PadShortFunctions - True if the short functions should be padded to prevent
> +  /// a stall when returning too early.
> +  bool PadShortFunctions;
> +
>   /// stackAlignment - The minimum alignment known to hold of the stack frame on
>   /// entry to the function and which must be maintained by every function.
>   unsigned stackAlignment;
> @@ -231,6 +235,7 @@
>   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
>   bool useLeaForSP() const { return UseLeaForSP; }
>   bool hasSlowDivide() const { return HasSlowDivide; }
> +  bool padShortFunctions() const { return PadShortFunctions; }
> 
>   bool isAtom() const { return X86ProcFamily == IntelAtom; }
> 
> 
> Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Fri Jan  4 14:54:54 2013
> @@ -190,6 +190,10 @@
>     addPass(createX86IssueVZeroUpperPass());
>     ShouldPrint = true;
>   }
> +  if (getX86Subtarget().padShortFunctions()){
> +    addPass(createX86PadShortFunctions());
> +    ShouldPrint = true;
> +  }
> 
>   return ShouldPrint;
> }
> 
> Added: llvm/trunk/test/CodeGen/X86/atom-pad-short-functions.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atom-pad-short-functions.ll?rev=171524&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atom-pad-short-functions.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atom-pad-short-functions.ll Fri Jan  4 14:54:54 2013
> @@ -0,0 +1,71 @@
> +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
> +
> +declare void @external_function(...)
> +
> +define i32 @test_return_val(i32 %a) nounwind {
> +; CHECK: test_return_val
> +; CHECK: movl
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: ret
> +  ret i32 %a
> +}
> +
> +define i32 @test_add(i32 %a, i32 %b) nounwind {
> +; CHECK: test_add
> +; CHECK: addl
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: ret
> +  %result = add i32 %a, %b
> +  ret i32 %result
> +}
> +
> +define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind {
> +; CHECK: @test_multiple_ret
> +; CHECK: je
> +
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: ret
> +
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: ret
> +
> +  br i1 %c, label %bb1, label %bb2
> +
> +bb1:
> +  ret i32 %a
> +
> +bb2:
> +  ret i32 %b
> +}
> +
> +define void @test_call_others(i32 %x) nounwind
> +{
> +; CHECK: test_call_others
> +; CHECK: je
> +  %tobool = icmp eq i32 %x, 0
> +  br i1 %tobool, label %if.end, label %true.case
> +
> +; CHECK: jmp external_function
> +true.case:
> +  tail call void bitcast (void (...)* @external_function to void ()*)() nounwind
> +  br label %if.end
> +
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: nop
> +; CHECK: ret
> +if.end:
> +  ret void
> +
> +}
> 
> Modified: llvm/trunk/test/CodeGen/X86/fast-isel-x86-64.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-x86-64.ll?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fast-isel-x86-64.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fast-isel-x86-64.ll Fri Jan  4 14:54:54 2013
> @@ -1,5 +1,5 @@
> -; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
> -; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
> +; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
> +; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
> 
> target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
> target triple = "x86_64-apple-darwin10.0.0"
> 
> Modified: llvm/trunk/test/CodeGen/X86/ret-mmx.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ret-mmx.ll?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/ret-mmx.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/ret-mmx.ll Fri Jan  4 14:54:54 2013
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mcpu=core2 -mattr=+mmx,+sse2 | FileCheck %s
> ; rdar://6602459
> 
> @g_v1di = external global <1 x i64>
> 
> Modified: llvm/trunk/test/CodeGen/X86/select.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select.ll?rev=171524&r1=171523&r2=171524&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/select.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/select.ll Fri Jan  4 14:54:54 2013
> @@ -282,7 +282,7 @@
> ; ATOM: test13:
> ; ATOM: cmpl
> ; ATOM-NEXT: sbbl
> -; ATOM-NEXT: ret
> +; ATOM: ret
> }
> 
> define i32 @test14(i32 %a, i32 %b) nounwind {
> @@ -299,7 +299,7 @@
> ; ATOM: cmpl
> ; ATOM-NEXT: sbbl
> ; ATOM-NEXT: notl
> -; ATOM-NEXT: ret
> +; ATOM: ret
> }
> 
> ; rdar://10961709
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list