[llvm] 6bba958 - [X86] Change the scheduler model for 'pentium4' to SandyBridgeModel.

Thu Jul 16 22:05:13 PDT 2020

Author: Craig Topper
Date: 2020-07-16T22:04:29-07:00
New Revision: 6bba95831e480656124a5fbcd84f4f2a31e6c0b6

URL: https://github.com/llvm/llvm-project/commit/6bba95831e480656124a5fbcd84f4f2a31e6c0b6
DIFF: https://github.com/llvm/llvm-project/commit/6bba95831e480656124a5fbcd84f4f2a31e6c0b6.diff

LOG: [X86] Change the scheduler model for 'pentium4' to SandyBridgeModel.

I meant to do this in D83913, but missed it while updating the
feature list.

Interestingly I think this is disabling the postRA scheduler. But
it does match our default 64-bit behavior.

Reviewed By: echristo

Differential Revision: https://reviews.llvm.org/D83996

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86.td
    llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
    llvm/test/CodeGen/X86/cmov-fp.ll
    llvm/test/CodeGen/X86/post-ra-sched.ll
    llvm/test/CodeGen/X86/pr34088.ll
    llvm/test/CodeGen/X86/pr40539.ll
    llvm/test/DebugInfo/COFF/fpo-stack-protect.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 10d3007e5839..8ca6dac03675 100644

--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1080,7 +1080,7 @@ foreach P = ["pentium4", "pentium4m"] in {
   // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
   // give it more modern tunings.
   // FIXME: This wouldn't be needed if we supported mtune.
-  def : ProcessorModel<P, GenericPostRAModel,
+  def : ProcessorModel<P, SandyBridgeModel,
                        [FeatureX87, FeatureCMPXCHG8B,
                         FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
                         FeatureCMOV, FeatureInsertVZEROUPPER,

diff  --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
index 380c18fbf5c5..4446f360ec04 100644
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -1,9 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
 ; RUN: llc < %s -mtriple=i686-pc-windows-msvc | FileCheck %s -check-prefix=X32
 ; Control Flow Guard is currently only available on Windows
 
 
 ; Test that Control Flow Guard checks are correctly added for x86 vector calls.
 define void @func_cf_vector_x86(void (%struct.HVA)* %0, %struct.HVA* %1) #0 {
+; X32-LABEL: func_cf_vector_x86:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $48, %esp
+; X32-NEXT:    movl 8(%ebp), %ecx
+; X32-NEXT:    movl 12(%ebp), %eax
+; X32-NEXT:    movups (%eax), %xmm0
+; X32-NEXT:    movups 16(%eax), %xmm1
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movaps %xmm1, 16(%esp)
+; X32-NEXT:    movsd (%esp), %xmm4
+; X32-NEXT:    movsd 8(%esp), %xmm5
+; X32-NEXT:    movsd 16(%esp), %xmm6
+; X32-NEXT:    movsd 24(%esp), %xmm7
+; X32-NEXT:    calll *___guard_check_icall_fptr
+; X32-NEXT:    movaps %xmm4, %xmm0
+; X32-NEXT:    movaps %xmm5, %xmm1
+; X32-NEXT:    movaps %xmm6, %xmm2
+; X32-NEXT:    movaps %xmm7, %xmm3
+; X32-NEXT:    calll *%ecx
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
 entry:
   %2 = alloca %struct.HVA, align 8
   %3 = bitcast %struct.HVA* %2 to i8*
@@ -13,23 +39,6 @@ entry:
   call x86_vectorcallcc void %0(%struct.HVA inreg %5)
   ret void
 
-  ; X32-LABEL: func_cf_vector_x86
-  ; X32: 	     movl 12(%ebp), %eax
-  ; X32: 	     movl 8(%ebp), %ecx
-  ; X32: 	     movups	(%eax), %xmm0
-  ; X32: 	     movups	16(%eax), %xmm1
-  ; X32: 	     movaps	%xmm0, (%esp)
-  ; X32: 	     movaps	%xmm1, 16(%esp)
-  ; X32: 	     movsd	(%esp), %xmm4
-  ; X32: 	     movsd	8(%esp), %xmm5
-  ; X32: 	     movsd	16(%esp), %xmm6
-  ; X32: 	     movsd	24(%esp), %xmm7
-  ; X32: 	     calll *___guard_check_icall_fptr
-  ; X32: 	     movaps %xmm4, %xmm0
-  ; X32: 	     movaps %xmm5, %xmm1
-  ; X32: 	     movaps %xmm6, %xmm2
-  ; X32: 	     movaps %xmm7, %xmm3
-  ; X32: 	     calll  *%ecx
 }
 attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
 

diff  --git a/llvm/test/CodeGen/X86/cmov-fp.ll b/llvm/test/CodeGen/X86/cmov-fp.ll
index 756324bbdfdc..6bbad427a9b6 100644
--- a/llvm/test/CodeGen/X86/cmov-fp.ll
+++ b/llvm/test/CodeGen/X86/cmov-fp.ll
@@ -1056,11 +1056,11 @@ define float @test16(i32 %a, i32 %b, float %x) nounwind {
 define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test17:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovnbe %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1109,11 +1109,11 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test18:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovnb %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1162,11 +1162,11 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test19:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovb %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1215,11 +1215,11 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test20:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovbe %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1268,13 +1268,13 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test21:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setg %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1328,13 +1328,13 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test22:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setge %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1387,13 +1387,13 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test23:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setl %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1446,13 +1446,13 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test24:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setle %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/post-ra-sched.ll b/llvm/test/CodeGen/X86/post-ra-sched.ll
index f6de77a69883..70882fba5060 100644
--- a/llvm/test/CodeGen/X86/post-ra-sched.ll
+++ b/llvm/test/CodeGen/X86/post-ra-sched.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s --check-prefix=PENTIUM4
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s --check-prefix=PENTIUM4
 ; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
 ; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
 ; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
@@ -9,12 +10,26 @@
 ; happens during the post-RA-scheduler, which should be enabled by
 ; default with the above specified cpus.
 
+; Pentium4 is the default 32-bit CPU on Linux and currently has the postRA
+; scheduler disabled. Leaving the command lines in place in case we change that.
+
 @ptrs = external global [0 x i32*], align 4
 @idxa = common global i32 0, align 4
 @idxb = common global i32 0, align 4
 @res = common global i32 0, align 4
 
 define void @addindirect() {
+; PENTIUM4-LABEL: addindirect:
+; PENTIUM4:       # %bb.0: # %entry
+; PENTIUM4-NEXT:    movl idxa, %eax
+; PENTIUM4-NEXT:    movl ptrs(,%eax,4), %eax
+; PENTIUM4-NEXT:    movl idxb, %ecx
+; PENTIUM4-NEXT:    movl ptrs(,%ecx,4), %ecx
+; PENTIUM4-NEXT:    movl (%ecx), %ecx
+; PENTIUM4-NEXT:    addl (%eax), %ecx
+; PENTIUM4-NEXT:    movl %ecx, res
+; PENTIUM4-NEXT:    retl
+;
 ; CHECK-LABEL: addindirect:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl idxb, %ecx

diff  --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll
index 6950e50dd755..a57ff09cc037 100644
--- a/llvm/test/CodeGen/X86/pr34088.ll
+++ b/llvm/test/CodeGen/X86/pr34088.ll
@@ -6,7 +6,7 @@
 %struct.Buffer = type { i8*, i32 }
 
 ; This test checks that the load of store %2 is not dropped.
-; 
+;
 define i32 @pr34088() local_unnamed_addr {
 ; CHECK-LABEL: pr34088:
 ; CHECK:       # %bb.0: # %entry
@@ -18,13 +18,13 @@ define i32 @pr34088() local_unnamed_addr {
 ; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    subl $32, %esp
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movaps %xmm0, (%esp)
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
 ; CHECK-NEXT:    movaps %xmm1, (%esp)
+; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    .cfi_def_cfa %esp, 4

diff  --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll
index f2135cd2e73b..f52fec51203a 100644
--- a/llvm/test/CodeGen/X86/pr40539.ll
+++ b/llvm/test/CodeGen/X86/pr40539.ll
@@ -40,7 +40,6 @@ define zeroext i1 @_Z8test_cosv() {
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    divss {{\.LCPI.*}}, %xmm0
 ; CHECK-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -49,6 +48,7 @@ define zeroext i1 @_Z8test_cosv() {
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    fstps (%esp)
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    ucomiss %xmm0, %xmm1
 ; CHECK-NEXT:    setae %cl
 ; CHECK-NEXT:    ucomiss {{\.LCPI.*}}, %xmm0

diff  --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
index 26fe7c49e7ac..c604234a6055 100644
--- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
@@ -15,9 +15,9 @@
 ; CHECK:         subl    $20, %esp
 ; CHECK:         .cv_fpo_stackalloc      20
 ; CHECK:         .cv_fpo_endprologue
+; CHECK:         movl    28(%esp), %esi
 ; CHECK:         ___security_cookie
 
-; CHECK:         movl    28(%esp), %esi
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
@@ -30,7 +30,7 @@
 ; CHECK:         addl    $20, %esp
 ; CHECK:         popl    %esi
 ; CHECK:         retl
-; CHECK: Ltmp3:
+; CHECK: Ltmp2:
 ; CHECK:         .cv_fpo_endproc
 
 ; ModuleID = 't.c'