[llvm] 8ae2a18 - [X86] Use proxy scheduler models for bdver3/bdver4 cpus (#114873)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 04:33:07 PST 2024


Author: Simon Pilgrim
Date: 2024-11-13T12:33:04Z
New Revision: 8ae2a18736c15e0d0d9d0893b21bce4f3bf581c9

URL: https://github.com/llvm/llvm-project/commit/8ae2a18736c15e0d0d9d0893b21bce4f3bf581c9
DIFF: https://github.com/llvm/llvm-project/commit/8ae2a18736c15e0d0d9d0893b21bce4f3bf581c9.diff

LOG: [X86] Use proxy scheduler models for bdver3/bdver4 cpus (#114873)

We don't have specific models for bdver3/bdver4 cpus but we can use the
bdver2/znver1 models as proxy standins - these days the models are more
useful for analysis than for perfect instruction scheduling so these
should be fine.

While they don't accurately represent the bdver3/bdver4 architecture
(specifically the different fp-pipe layout), they give more accurate
latency/throughputs (vs Agner) than the default SandyBridge model, and
enable PostRA scheduling which all recent AMD models have benefitted
from.

I had to use the znver1 model for bdver4 so that we have AVX2
instruction coverage (none of the TBM/XOP/LWP/FMA4 instructions have
explicit schedules so this shouldn't be a problem) - they both
double-pump 256-bit instructions so this works pretty well.

This patch is based off a discussion at the devmtg regarding how easily
we can provide an actual scheduler model (or at least approximation) to
more of the X86 cpu targets - we can then add specific models if the
(unlikely) need arises.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86.td
    llvm/test/CodeGen/X86/lwp-intrinsics.ll
    llvm/test/CodeGen/X86/rotate_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 509632183dc016..75224d5b26e33f 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1912,11 +1912,13 @@ def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
                 ProcessorFeatures.BdVer2Tuning>;
 // Steamroller
-def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
-           ProcessorFeatures.BdVer3Tuning>;
+// NOTE: BdVer2Model is only an approx model for Steamroller.
+def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features,
+                ProcessorFeatures.BdVer3Tuning>;
 // Excavator
-def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
-           ProcessorFeatures.BdVer4Tuning>;
+// NOTE: Znver1Model is only an approx model for Excavator (with AVX2).
+def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features,
+                ProcessorFeatures.BdVer4Tuning>;
 
 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
                 ProcessorFeatures.ZNTuning>;

diff  --git a/llvm/test/CodeGen/X86/lwp-intrinsics.ll b/llvm/test/CodeGen/X86/lwp-intrinsics.ll
index d3ce7f5dbc4d66..6f32b09c838f08 100644
--- a/llvm/test/CodeGen/X86/lwp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/lwp-intrinsics.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+lwp | FileCheck %s --check-prefixes=X86,X86_LWP
-; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefixes=X86,X86_BDVER1
-; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=X86,X86_BDVER2
-; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefixes=X86,X86_BDVER3
-; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=X86,X86_BDVER4
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefixes=X86,X86_BDVER
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=X86,X86_BDVER
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefixes=X86,X86_BDVER
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=X86,X86_BDVER
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64
@@ -49,41 +49,14 @@ define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind {
 ; X86_LWP-NEXT:    setb %al
 ; X86_LWP-NEXT:    retl
 ;
-; X86_BDVER1-LABEL: test_lwpins32_rri:
-; X86_BDVER1:       # %bb.0:
-; X86_BDVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER1-NEXT:    addl %ecx, %ecx
-; X86_BDVER1-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
-; X86_BDVER1-NEXT:    setb %al
-; X86_BDVER1-NEXT:    retl
-;
-; X86_BDVER2-LABEL: test_lwpins32_rri:
-; X86_BDVER2:       # %bb.0:
-; X86_BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER2-NEXT:    addl %ecx, %ecx
-; X86_BDVER2-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
-; X86_BDVER2-NEXT:    setb %al
-; X86_BDVER2-NEXT:    retl
-;
-; X86_BDVER3-LABEL: test_lwpins32_rri:
-; X86_BDVER3:       # %bb.0:
-; X86_BDVER3-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER3-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER3-NEXT:    addl %ecx, %ecx
-; X86_BDVER3-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
-; X86_BDVER3-NEXT:    setb %al
-; X86_BDVER3-NEXT:    retl
-;
-; X86_BDVER4-LABEL: test_lwpins32_rri:
-; X86_BDVER4:       # %bb.0:
-; X86_BDVER4-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER4-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER4-NEXT:    addl %ecx, %ecx
-; X86_BDVER4-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
-; X86_BDVER4-NEXT:    setb %al
-; X86_BDVER4-NEXT:    retl
+; X86_BDVER-LABEL: test_lwpins32_rri:
+; X86_BDVER:       # %bb.0:
+; X86_BDVER-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86_BDVER-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86_BDVER-NEXT:    addl %ecx, %ecx
+; X86_BDVER-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
+; X86_BDVER-NEXT:    setb %al
+; X86_BDVER-NEXT:    retl
 ;
 ; X64-LABEL: test_lwpins32_rri:
 ; X64:       # %bb.0:
@@ -124,37 +97,13 @@ define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind {
 ; X86_LWP-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
 ; X86_LWP-NEXT:    retl
 ;
-; X86_BDVER1-LABEL: test_lwpval32_rri:
-; X86_BDVER1:       # %bb.0:
-; X86_BDVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER1-NEXT:    addl %ecx, %ecx
-; X86_BDVER1-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
-; X86_BDVER1-NEXT:    retl
-;
-; X86_BDVER2-LABEL: test_lwpval32_rri:
-; X86_BDVER2:       # %bb.0:
-; X86_BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER2-NEXT:    addl %ecx, %ecx
-; X86_BDVER2-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
-; X86_BDVER2-NEXT:    retl
-;
-; X86_BDVER3-LABEL: test_lwpval32_rri:
-; X86_BDVER3:       # %bb.0:
-; X86_BDVER3-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER3-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER3-NEXT:    addl %ecx, %ecx
-; X86_BDVER3-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
-; X86_BDVER3-NEXT:    retl
-;
-; X86_BDVER4-LABEL: test_lwpval32_rri:
-; X86_BDVER4:       # %bb.0:
-; X86_BDVER4-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86_BDVER4-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86_BDVER4-NEXT:    addl %ecx, %ecx
-; X86_BDVER4-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
-; X86_BDVER4-NEXT:    retl
+; X86_BDVER-LABEL: test_lwpval32_rri:
+; X86_BDVER:       # %bb.0:
+; X86_BDVER-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86_BDVER-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86_BDVER-NEXT:    addl %ecx, %ecx
+; X86_BDVER-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
+; X86_BDVER-NEXT:    retl
 ;
 ; X64-LABEL: test_lwpval32_rri:
 ; X64:       # %bb.0:

diff  --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll
index 11d62c307a1ddf..a5349cb33193ff 100644
--- a/llvm/test/CodeGen/X86/rotate_vec.ll
+++ b/llvm/test/CodeGen/X86/rotate_vec.ll
@@ -162,21 +162,13 @@ define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
 }
 
 define <8 x i16> @or_fshl_v8i16(<8 x i16> %x, <8 x i16> %y) {
-; XOPAVX1-LABEL: or_fshl_v8i16:
-; XOPAVX1:       # %bb.0:
-; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm1
-; XOPAVX1-NEXT:    vpsrlw $11, %xmm0, %xmm0
-; XOPAVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; XOPAVX1-NEXT:    retq
-;
-; XOPAVX2-LABEL: or_fshl_v8i16:
-; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm1
-; XOPAVX2-NEXT:    vpsllw $5, %xmm1, %xmm1
-; XOPAVX2-NEXT:    vpsrlw $11, %xmm0, %xmm0
-; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; XOPAVX2-NEXT:    retq
+; XOP-LABEL: or_fshl_v8i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm1
+; XOP-NEXT:    vpsrlw $11, %xmm0, %xmm0
+; XOP-NEXT:    vpsllw $5, %xmm1, %xmm1
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
 ;
 ; AVX512-LABEL: or_fshl_v8i16:
 ; AVX512:       # %bb.0:
@@ -193,21 +185,13 @@ define <8 x i16> @or_fshl_v8i16(<8 x i16> %x, <8 x i16> %y) {
 }
 
 define <4 x i32> @or_fshl_v4i32(<4 x i32> %x, <4 x i32> %y) {
-; XOPAVX1-LABEL: or_fshl_v4i32:
-; XOPAVX1:       # %bb.0:
-; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm1
-; XOPAVX1-NEXT:    vpsrld $11, %xmm0, %xmm0
-; XOPAVX1-NEXT:    vpslld $21, %xmm1, %xmm1
-; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; XOPAVX1-NEXT:    retq
-;
-; XOPAVX2-LABEL: or_fshl_v4i32:
-; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm1
-; XOPAVX2-NEXT:    vpslld $21, %xmm1, %xmm1
-; XOPAVX2-NEXT:    vpsrld $11, %xmm0, %xmm0
-; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; XOPAVX2-NEXT:    retq
+; XOP-LABEL: or_fshl_v4i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm1
+; XOP-NEXT:    vpsrld $11, %xmm0, %xmm0
+; XOP-NEXT:    vpslld $21, %xmm1, %xmm1
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
 ;
 ; AVX512-LABEL: or_fshl_v4i32:
 ; AVX512:       # %bb.0:


        


More information about the llvm-commits mailing list