[llvm] r267809 - [X86] Enable the post-RA-scheduler for clang's default 32-bit cpu.

David Majnemer via llvm-commits llvm-commits at lists.llvm.org
Mon May 9 09:31:55 PDT 2016


This seems to have caused PR27681 <http://llvm.org/PR27681>

Mitch, can you please take a look?

On Wed, Apr 27, 2016 at 3:52 PM, Mitch Bodart via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: mbodart
> Date: Wed Apr 27 17:52:35 2016
> New Revision: 267809
>
> URL: http://llvm.org/viewvc/llvm-project?rev=267809&view=rev
> Log:
> [X86] Enable the post-RA-scheduler for clang's default 32-bit cpu.
>
> For compilations with no explicit cpu specified, this exhibits
> nice gains on Silvermont, with neutral performance on big cores.
>
> Differential Revision: http://reviews.llvm.org/D19138
>
> Added:
>     llvm/trunk/test/CodeGen/X86/post-ra-sched.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86.td
>     llvm/trunk/lib/Target/X86/X86Schedule.td
>
> Modified: llvm/trunk/lib/Target/X86/X86.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=267809&r1=267808&r2=267809&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86.td (original)
> +++ llvm/trunk/lib/Target/X86/X86.td Wed Apr 27 17:52:35 2016
> @@ -276,12 +276,28 @@ def : Proc<"pentium3",        [FeatureX8
>                                 FeatureSSE1, FeatureFXSR]>;
>  def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
>                                 FeatureSSE1, FeatureFXSR,
> FeatureSlowBTMem]>;
> -def : Proc<"pentium-m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> -                               FeatureSSE2, FeatureFXSR,
> FeatureSlowBTMem]>;
> -def : Proc<"pentium4",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> -                               FeatureSSE2, FeatureFXSR]>;
> -def : Proc<"pentium4m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> -                               FeatureSSE2, FeatureFXSR,
> FeatureSlowBTMem]>;
> +
> +// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
> +// The intent is to enable it for pentium4 which is the current default
> +// processor in a vanilla 32-bit clang compilation when no specific
> +// architecture is specified.  This generally gives a nice performance
> +// increase on silvermont, with largely neutral behavior on other
> +// contemporary large core processors.
> +// pentium-m, pentium4m, prescott and nocona are included as a
> preventative
> +// measure to avoid performance surprises, in case clang's default cpu
> +// changes slightly.
> +
> +def : ProcessorModel<"pentium-m", GenericPostRAModel,
> +                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> +                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
> +
> +def : ProcessorModel<"pentium4", GenericPostRAModel,
> +                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> +                      FeatureSSE2, FeatureFXSR]>;
> +
> +def : ProcessorModel<"pentium4m", GenericPostRAModel,
> +                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> +                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
>
>  // Intel Quark.
>  def : Proc<"lakemont",        []>;
> @@ -292,10 +308,10 @@ def : ProcessorModel<"yonah", SandyBridg
>                        FeatureFXSR, FeatureSlowBTMem]>;
>
>  // NetBurst.
> -def : Proc<"prescott",
> -           [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
> -            FeatureFXSR, FeatureSlowBTMem]>;
> -def : Proc<"nocona", [
> +def : ProcessorModel<"prescott", GenericPostRAModel,
> +                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
> FeatureSSE3,
> +                      FeatureFXSR, FeatureSlowBTMem]>;
> +def : ProcessorModel<"nocona", GenericPostRAModel, [
>    FeatureX87,
>    FeatureSlowUAMem16,
>    FeatureMMX,
>
> Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=267809&r1=267808&r2=267809&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
> +++ llvm/trunk/lib/Target/X86/X86Schedule.td Wed Apr 27 17:52:35 2016
> @@ -633,8 +633,9 @@ def IIC_NOP : InstrItinClass;
>  // latencies. Since these latencies are not used for pipeline hazards,
>  // they do not need to be exact.
>  //
> -// The GenericModel contains no instruction itineraries.
> -def GenericModel : SchedMachineModel {
> +// The GenericX86Model contains no instruction itineraries
> +// and disables PostRAScheduler.
> +class GenericX86Model : SchedMachineModel {
>    let IssueWidth = 4;
>    let MicroOpBufferSize = 32;
>    let LoadLatency = 4;
> @@ -643,6 +644,13 @@ def GenericModel : SchedMachineModel {
>    let CompleteModel = 0;
>  }
>
> +def GenericModel : GenericX86Model;
> +
> +// Define a model with the PostRAScheduler enabled.
> +def GenericPostRAModel : GenericX86Model {
> +  let PostRAScheduler = 1;
> +}
> +
>  include "X86ScheduleAtom.td"
>  include "X86SchedSandyBridge.td"
>  include "X86SchedHaswell.td"
>
> Added: llvm/trunk/test/CodeGen/X86/post-ra-sched.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/post-ra-sched.ll?rev=267809&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/post-ra-sched.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/post-ra-sched.ll Wed Apr 27 17:52:35 2016
> @@ -0,0 +1,40 @@
> +; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
> +; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
> +; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
> +; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
> +; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
> +;
> +; Verify that scheduling puts some distance between a load feeding into
> +; the address of another load, and that second load.  This currently
> +; happens during the post-RA-scheduler, which should be enabled by
> +; default with the above specified cpus.
> +
> + at ptrs = external global [0 x i32*], align 4
> + at idxa = common global i32 0, align 4
> + at idxb = common global i32 0, align 4
> + at res = common global i32 0, align 4
> +
> +define void @addindirect() {
> +; CHECK-LABEL: addindirect:
> +; CHECK:       # BB#0: # %entry
> +; CHECK-NEXT:    movl idxb, %ecx
> +; CHECK-NEXT:    movl idxa, %eax
> +; CHECK-NEXT:    movl ptrs(,%ecx,4), %ecx
> +; CHECK-NEXT:    movl ptrs(,%eax,4), %eax
> +; CHECK-NEXT:    movl (%ecx), %ecx
> +; CHECK-NEXT:    addl (%eax), %ecx
> +; CHECK-NEXT:    movl %ecx, res
> +; CHECK-NEXT:    retl
> +entry:
> +  %0 = load i32, i32* @idxa, align 4
> +  %arrayidx = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32
> 0, i32 %0
> +  %1 = load i32*, i32** %arrayidx, align 4
> +  %2 = load i32, i32* %1, align 4
> +  %3 = load i32, i32* @idxb, align 4
> +  %arrayidx1 = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32
> 0, i32 %3
> +  %4 = load i32*, i32** %arrayidx1, align 4
> +  %5 = load i32, i32* %4, align 4
> +  %add = add i32 %5, %2
> +  store i32 %add, i32* @res, align 4
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160509/30d5eb80/attachment-0001.html>


More information about the llvm-commits mailing list