<div dir="ltr">This seems to have caused PR27681 <<a href="http://llvm.org/PR27681">http://llvm.org/PR27681</a>><div><br></div><div>Mitch, can you please take a look?</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Apr 27, 2016 at 3:52 PM, Mitch Bodart via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: mbodart<br>

Date: Wed Apr 27 17:52:35 2016<br>

New Revision: 267809<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=267809&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=267809&view=rev</a><br>

Log:<br>

[X86] Enable the post-RA-scheduler for clang's default 32-bit cpu.<br>

<br>

For compilations with no explicit cpu specified, this exhibits<br>

nice gains on Silvermont, with neutral performance on big cores.<br>

<br>

Differential Revision: <a href="http://reviews.llvm.org/D19138" rel="noreferrer" target="_blank">http://reviews.llvm.org/D19138</a><br>

<br>

Added:<br>

    llvm/trunk/test/CodeGen/X86/post-ra-sched.ll<br>

Modified:<br>

    llvm/trunk/lib/Target/X86/X86.td<br>

    llvm/trunk/lib/Target/X86/X86Schedule.td<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86.td<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=267809&r1=267808&r2=267809&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=267809&r1=267808&r2=267809&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86.td (original)<br>

+++ llvm/trunk/lib/Target/X86/X86.td Wed Apr 27 17:52:35 2016<br>

@@ -276,12 +276,28 @@ def : Proc<"pentium3",        [FeatureX8<br>

                                FeatureSSE1, FeatureFXSR]>;<br>

 def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

                                FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;<br>

-def : Proc<"pentium-m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

-                               FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;<br>

-def : Proc<"pentium4",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

-                               FeatureSSE2, FeatureFXSR]>;<br>

-def : Proc<"pentium4m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

-                               FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;<br>

+<br>

+// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.<br>

+// The intent is to enable it for pentium4 which is the current default<br>

+// processor in a vanilla 32-bit clang compilation when no specific<br>

+// architecture is specified.  This generally gives a nice performance<br>

+// increase on silvermont, with largely neutral behavior on other<br>

+// contemporary large core processors.<br>

+// pentium-m, pentium4m, prescott and nocona are included as a preventative<br>

+// measure to avoid performance surprises, in case clang's default cpu<br>

+// changes slightly.<br>

+<br>

+def : ProcessorModel<"pentium-m", GenericPostRAModel,<br>

+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

+                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;<br>

+<br>

+def : ProcessorModel<"pentium4", GenericPostRAModel,<br>

+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

+                      FeatureSSE2, FeatureFXSR]>;<br>

+<br>

+def : ProcessorModel<"pentium4m", GenericPostRAModel,<br>

+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,<br>

+                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;<br>

<br>

 // Intel Quark.<br>

 def : Proc<"lakemont",        []>;<br>

@@ -292,10 +308,10 @@ def : ProcessorModel<"yonah", SandyBridg<br>

                       FeatureFXSR, FeatureSlowBTMem]>;<br>

<br>

 // NetBurst.<br>

-def : Proc<"prescott",<br>

-           [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,<br>

-            FeatureFXSR, FeatureSlowBTMem]>;<br>

-def : Proc<"nocona", [<br>

+def : ProcessorModel<"prescott", GenericPostRAModel,<br>

+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,<br>

+                      FeatureFXSR, FeatureSlowBTMem]>;<br>

+def : ProcessorModel<"nocona", GenericPostRAModel, [<br>

   FeatureX87,<br>

   FeatureSlowUAMem16,<br>

   FeatureMMX,<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=267809&r1=267808&r2=267809&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=267809&r1=267808&r2=267809&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)<br>

+++ llvm/trunk/lib/Target/X86/X86Schedule.td Wed Apr 27 17:52:35 2016<br>

@@ -633,8 +633,9 @@ def IIC_NOP : InstrItinClass;<br>

 // latencies. Since these latencies are not used for pipeline hazards,<br>

 // they do not need to be exact.<br>

 //<br>

-// The GenericModel contains no instruction itineraries.<br>

-def GenericModel : SchedMachineModel {<br>

+// The GenericX86Model contains no instruction itineraries<br>

+// and disables PostRAScheduler.<br>

+class GenericX86Model : SchedMachineModel {<br>

   let IssueWidth = 4;<br>

   let MicroOpBufferSize = 32;<br>

   let LoadLatency = 4;<br>

@@ -643,6 +644,13 @@ def GenericModel : SchedMachineModel {<br>

   let CompleteModel = 0;<br>

 }<br>

<br>

+def GenericModel : GenericX86Model;<br>

+<br>

+// Define a model with the PostRAScheduler enabled.<br>

+def GenericPostRAModel : GenericX86Model {<br>

+  let PostRAScheduler = 1;<br>

+}<br>

+<br>

 include "X86ScheduleAtom.td"<br>

 include "X86SchedSandyBridge.td"<br>

 include "X86SchedHaswell.td"<br>

<br>

Added: llvm/trunk/test/CodeGen/X86/post-ra-sched.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/post-ra-sched.ll?rev=267809&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/post-ra-sched.ll?rev=267809&view=auto</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/post-ra-sched.ll (added)<br>

+++ llvm/trunk/test/CodeGen/X86/post-ra-sched.ll Wed Apr 27 17:52:35 2016<br>

@@ -0,0 +1,40 @@<br>

+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s<br>

+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s<br>

+; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s<br>

+; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s<br>

+; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s<br>

+;<br>

+; Verify that scheduling puts some distance between a load feeding into<br>

+; the address of another load, and that second load.  This currently<br>

+; happens during the post-RA-scheduler, which should be enabled by<br>

+; default with the above specified cpus.<br>

+<br>

+@ptrs = external global [0 x i32*], align 4<br>

+@idxa = common global i32 0, align 4<br>

+@idxb = common global i32 0, align 4<br>

+@res = common global i32 0, align 4<br>

+<br>

+define void @addindirect() {<br>

+; CHECK-LABEL: addindirect:<br>

+; CHECK:       # BB#0: # %entry<br>

+; CHECK-NEXT:    movl idxb, %ecx<br>

+; CHECK-NEXT:    movl idxa, %eax<br>

+; CHECK-NEXT:    movl ptrs(,%ecx,4), %ecx<br>

+; CHECK-NEXT:    movl ptrs(,%eax,4), %eax<br>

+; CHECK-NEXT:    movl (%ecx), %ecx<br>

+; CHECK-NEXT:    addl (%eax), %ecx<br>

+; CHECK-NEXT:    movl %ecx, res<br>

+; CHECK-NEXT:    retl<br>

+entry:<br>

+  %0 = load i32, i32* @idxa, align 4<br>

+  %arrayidx = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32 0, i32 %0<br>

+  %1 = load i32*, i32** %arrayidx, align 4<br>

+  %2 = load i32, i32* %1, align 4<br>

+  %3 = load i32, i32* @idxb, align 4<br>

+  %arrayidx1 = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32 0, i32 %3<br>

+  %4 = load i32*, i32** %arrayidx1, align 4<br>

+  %5 = load i32, i32* %4, align 4<br>

+  %add = add i32 %5, %2<br>

+  store i32 %add, i32* @res, align 4<br>

+  ret void<br>

+}<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>

</blockquote></div><br></div>