[llvm] [X86] Remove SlowDivide tuning from GRTTuning (PR #84676)

Mon Mar 11 03:26:46 PDT 2024

https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/84676

>From 083ea9d3760dd3c67f2d70c0e521736ad2b4bf54 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Sun, 10 Mar 2024 21:22:29 +0800
Subject: [PATCH 1/2] [X86] Remove SlowDivide tuning from GRTTuning

The DIV32/64 throughput was improved since Goldmont in the Atom
architecture. The Alder Lake-E shows similar number too. So we shouldn't
add such tunings to Gracemont and later architectures.
---
 llvm/lib/Target/X86/X86.td | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2a65ce75d6b9a..8367f938c0ddfa 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1237,8 +1237,6 @@ def ProcessorFeatures {
   // Gracemont
   list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
                                       TuningSlow3OpsLEA,
-                                      TuningSlowDivide32,
-                                      TuningSlowDivide64,
                                       TuningFastScalarFSQRT,
                                       TuningFastVectorFSQRT,
                                       TuningFast15ByteNOP,

>From 99889961bc4ab7bb59244365d6ea03c81e0c2fc5 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Mon, 11 Mar 2024 18:24:39 +0800
Subject: [PATCH 2/2] Add coverage for goldmont and gracemont in
 bypass-slow-division-tune.ll

---
 .../CodeGen/X86/bypass-slow-division-tune.ll     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
index 8369a44dcbad2d..afecf00113a0a6 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
@@ -4,6 +4,8 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64     < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont   < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=gracemont  < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
 ; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
 
 ; Verify that div32 is bypassed only for Atoms.
@@ -117,6 +119,13 @@ define i64 @div64(i64 %a, i64 %b) {
 ; SKL-NEXT:    # kill: def $eax killed $eax def $rax
 ; SKL-NEXT:    retq
 ;
+; GMT-LABEL: div64:
+; GMT:       # %bb.0: # %entry
+; GMT-NEXT:    movq %rdi, %rax
+; GMT-NEXT:    cqto
+; GMT-NEXT:    idivq %rsi
+; GMT-NEXT:    retq
+;
 ; HUGEWS-LABEL: div64:
 ; HUGEWS:       # %bb.0: # %entry
 ; HUGEWS-NEXT:    movq %rdi, %rax
@@ -240,6 +249,13 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
 ; SKL-NEXT:    # kill: def $eax killed $eax def $rax
 ; SKL-NEXT:    retq
 ;
+; GMT-LABEL: div64_hugews:
+; GMT:       # %bb.0:
+; GMT-NEXT:    movq %rdi, %rax
+; GMT-NEXT:    cqto
+; GMT-NEXT:    idivq %rsi
+; GMT-NEXT:    retq
+;
 ; HUGEWS-LABEL: div64_hugews:
 ; HUGEWS:       # %bb.0:
 ; HUGEWS-NEXT:    movq %rdi, %rax