[llvm] e89b4bc - [X86] Remove SlowDivide tuning from GRTTuning (#84676)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 11 22:41:53 PDT 2024
Author: Phoebe Wang
Date: 2024-03-12T13:41:49+08:00
New Revision: e89b4bcf32b8f6ddce9d7e95659e9f092a55c021
URL: https://github.com/llvm/llvm-project/commit/e89b4bcf32b8f6ddce9d7e95659e9f092a55c021
DIFF: https://github.com/llvm/llvm-project/commit/e89b4bcf32b8f6ddce9d7e95659e9f092a55c021.diff
LOG: [X86] Remove SlowDivide tuning from GRTTuning (#84676)
The DIV32/64 throughput was improved since Goldmont in the Atom
architecture. The Alder Lake-E shows similar number too. So we shouldn't
add such tunings to Gracemont and later products.
Checked from Agner Fog's table and uops.info.
Added:
Modified:
llvm/lib/Target/X86/X86.td
llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2a65ce75d6b9a..8367f938c0ddfa 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1237,8 +1237,6 @@ def ProcessorFeatures {
// Gracemont
list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
TuningSlow3OpsLEA,
- TuningSlowDivide32,
- TuningSlowDivide64,
TuningFastScalarFSQRT,
TuningFastVectorFSQRT,
TuningFast15ByteNOP,
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
index 8369a44dcbad2d..afecf00113a0a6 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
@@ -4,6 +4,8 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=gracemont < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=HUGEWS %s
; Verify that div32 is bypassed only for Atoms.
@@ -117,6 +119,13 @@ define i64 @div64(i64 %a, i64 %b) {
; SKL-NEXT: # kill: def $eax killed $eax def $rax
; SKL-NEXT: retq
;
+; GMT-LABEL: div64:
+; GMT: # %bb.0: # %entry
+; GMT-NEXT: movq %rdi, %rax
+; GMT-NEXT: cqto
+; GMT-NEXT: idivq %rsi
+; GMT-NEXT: retq
+;
; HUGEWS-LABEL: div64:
; HUGEWS: # %bb.0: # %entry
; HUGEWS-NEXT: movq %rdi, %rax
@@ -240,6 +249,13 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
; SKL-NEXT: # kill: def $eax killed $eax def $rax
; SKL-NEXT: retq
;
+; GMT-LABEL: div64_hugews:
+; GMT: # %bb.0:
+; GMT-NEXT: movq %rdi, %rax
+; GMT-NEXT: cqto
+; GMT-NEXT: idivq %rsi
+; GMT-NEXT: retq
+;
; HUGEWS-LABEL: div64_hugews:
; HUGEWS: # %bb.0:
; HUGEWS-NEXT: movq %rdi, %rax
More information about the llvm-commits
mailing list