[llvm] e89b4bc - [X86] Remove SlowDivide tuning from GRTTuning (#84676)

Mon Mar 11 22:41:53 PDT 2024

Author: Phoebe Wang
Date: 2024-03-12T13:41:49+08:00
New Revision: e89b4bcf32b8f6ddce9d7e95659e9f092a55c021

URL: https://github.com/llvm/llvm-project/commit/e89b4bcf32b8f6ddce9d7e95659e9f092a55c021
DIFF: https://github.com/llvm/llvm-project/commit/e89b4bcf32b8f6ddce9d7e95659e9f092a55c021.diff

LOG: [X86] Remove SlowDivide tuning from GRTTuning (#84676)

The DIV32/64 throughput was improved since Goldmont in the Atom
architecture. The Alder Lake-E shows similar number too. So we shouldn't
add such tunings to Gracemont and later products.

Checked from Agner Fog's table and uops.info.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86.td
    llvm/test/CodeGen/X86/bypass-slow-division-tune.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2a65ce75d6b9a..8367f938c0ddfa 100644

--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1237,8 +1237,6 @@ def ProcessorFeatures {
   // Gracemont
   list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
                                       TuningSlow3OpsLEA,
-                                      TuningSlowDivide32,
-                                      TuningSlowDivide64,
                                       TuningFastScalarFSQRT,
                                       TuningFastVectorFSQRT,
                                       TuningFast15ByteNOP,

diff  --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
index 8369a44dcbad2d..afecf00113a0a6 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
@@ -4,6 +4,8 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64     < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont   < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=gracemont  < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
 ; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
 
 ; Verify that div32 is bypassed only for Atoms.
@@ -117,6 +119,13 @@ define i64 @div64(i64 %a, i64 %b) {
 ; SKL-NEXT:    # kill: def $eax killed $eax def $rax
 ; SKL-NEXT:    retq
 ;
+; GMT-LABEL: div64:
+; GMT:       # %bb.0: # %entry
+; GMT-NEXT:    movq %rdi, %rax
+; GMT-NEXT:    cqto
+; GMT-NEXT:    idivq %rsi
+; GMT-NEXT:    retq
+;
 ; HUGEWS-LABEL: div64:
 ; HUGEWS:       # %bb.0: # %entry
 ; HUGEWS-NEXT:    movq %rdi, %rax
@@ -240,6 +249,13 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
 ; SKL-NEXT:    # kill: def $eax killed $eax def $rax
 ; SKL-NEXT:    retq
 ;
+; GMT-LABEL: div64_hugews:
+; GMT:       # %bb.0:
+; GMT-NEXT:    movq %rdi, %rax
+; GMT-NEXT:    cqto
+; GMT-NEXT:    idivq %rsi
+; GMT-NEXT:    retq
+;
 ; HUGEWS-LABEL: div64_hugews:
 ; HUGEWS:       # %bb.0:
 ; HUGEWS-NEXT:    movq %rdi, %rax