[llvm] r178171 - For the current Atom processor, the fastest way to handle a call
Preston Gurd
preston.gurd at intel.com
Wed Mar 27 12:14:02 PDT 2013
Author: pgurd
Date: Wed Mar 27 14:14:02 2013
New Revision: 178171
URL: http://llvm.org/viewvc/llvm-project?rev=178171&view=rev
Log:
For the current Atom processor, the fastest way to handle a call
indirect through a memory address is to load the memory address into
a register and then call indirect through the register.
This patch implements this improvement by modifying SelectionDAG to
force a function address which is a memory reference to be loaded
into a virtual register.
Patch by Sriram Murali.
Added:
llvm/trunk/test/CodeGen/X86/atom-call-reg-indirect.ll
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=178171&r1=178170&r2=178171&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Wed Mar 27 14:14:02 2013
@@ -134,6 +134,9 @@ def FeatureSlowDivide : SubtargetFeature
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+ "CallRegIndirect", "true",
+ "Call register indirect">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -181,7 +184,9 @@ def : ProcessorModel<"penryn", SandyBrid
def : ProcessorModel<"atom", AtomModel,
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide, FeaturePadShortFunctions]>;
+ FeatureSlowDivide,
+ FeatureCallRegIndirect,
+ FeaturePadShortFunctions]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=178171&r1=178170&r2=178171&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Mar 27 14:14:02 2013
@@ -2629,6 +2629,19 @@ X86TargetLowering::LowerCall(TargetLower
InFlag = Chain.getValue(1);
}
+ // Use indirect reference through register, when CALL uses a memory reference.
+ if (Subtarget->callRegIndirect() &&
+ Callee.getOpcode() == ISD::LOAD) {
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned VReg = MRI.createVirtualRegister(AddrRegClass);
+ SDValue tempValue = DAG.getCopyFromReg(Callee,
+ dl, VReg, Callee.getValueType());
+ Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
Ops.push_back(Chain);
Ops.push_back(Callee);
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=178171&r1=178170&r2=178171&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Wed Mar 27 14:14:02 2013
@@ -458,6 +458,7 @@ void X86Subtarget::initializeEnvironment
HasSlowDivide = false;
PostRAScheduler = false;
PadShortFunctions = false;
+ CallRegIndirect = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=178171&r1=178170&r2=178171&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Wed Mar 27 14:14:02 2013
@@ -159,6 +159,10 @@ protected:
/// a stall when returning too early.
bool PadShortFunctions;
+ /// CallRegIndirect - True if the Calls with memory reference should be converted
+ /// to a register-based indirect call.
+ bool CallRegIndirect;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -269,6 +273,7 @@ public:
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
+ bool callRegIndirect() const { return CallRegIndirect; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
Added: llvm/trunk/test/CodeGen/X86/atom-call-reg-indirect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atom-call-reg-indirect.ll?rev=178171&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atom-call-reg-indirect.ll (added)
+++ llvm/trunk/test/CodeGen/X86/atom-call-reg-indirect.ll Wed Mar 27 14:14:02 2013
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM32 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM64 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+
+
+; fn_ptr.ll
+%class.A = type { i32 (...)** }
+
+define i32 @test1() #0 {
+ ;ATOM: test1
+entry:
+ %call = tail call %class.A* @_Z3facv()
+ %0 = bitcast %class.A* %call to void (%class.A*)***
+ %vtable = load void (%class.A*)*** %0, align 8
+ %1 = load void (%class.A*)** %vtable, align 8
+ ;ATOM32: movl (%ecx), %ecx
+ ;ATOM32: calll *%ecx
+ ;ATOM-NOT32: calll *(%ecx)
+ ;ATOM64: movq (%rcx), %rcx
+ ;ATOM64: callq *%rcx
+ ;ATOM-NOT64: callq *(%rcx)
+ tail call void %1(%class.A* %call)
+ ret i32 0
+}
+
+declare %class.A* @_Z3facv() #1
+
+; virt_fn.ll
+ at p = external global void (i32)**
+
+define i32 @test2() #0 {
+ ;ATOM: test2
+entry:
+ %0 = load void (i32)*** @p, align 8
+ %1 = load void (i32)** %0, align 8
+ ;ATOM32: movl (%eax), %eax
+ ;ATOM32: calll *%eax
+ ;ATOM-NOT: calll *(%eax)
+ ;ATOM64: movq (%rax), %rax
+ ;ATOM64: callq *%rax
+ ;ATOM-NOT64: callq *(%rax)
+ tail call void %1(i32 2)
+ ret i32 0
+}
More information about the llvm-commits
mailing list