[llvm] [AArch64] Enable AvoidLDAPUR for cpu=generic between armv9.0 and armv9.3. (PR #125261)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 09:51:13 PST 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/125261
As added in #124274, CPUs in this range can suffer from performance issues with ldapur. As the gain from ldar->ldapr is expected to be greater than the minor gain from ldapr->ldapur, this opts to avoid the instruction under the default -mcpu=generic when the -march is less that armv9.3.
I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be clearer what it means.
>From c82fa16ab07689cb4f47ef01ced16bac5981f7e6 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 31 Jan 2025 17:45:42 +0000
Subject: [PATCH] [AArch64] Enable AvoidLDAPUR for cpu=generic between armv9.0
and armv9.3.
As added in #124274, CPUs in this range can suffer from performance issues with
ldapur. As the gain from ldar->ldapr is expected to be greater than the minor
gain from ldapr->ldapur, this opts to avoid the instruction under the default
-mcpu=generic when the -march is less that armv9.3.
---
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 7 ++++++-
llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 ++--
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
.../AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll | 2 ++
4 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index bc921f07e1dbf8..89e3e64e984421 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -125,7 +125,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
// this in the future so we can specify it together with the subtarget
// features.
switch (ARMProcFamily) {
- case Others:
+ case Generic:
+ // Using TuneCPU=generic we avoid ldapur instructions to line up with the
+ // cpus that use the AvoidLDAPUR feature. We don't want this to be on
+ // forever, so it is enabled between armv9.0 and armv9.2.
+ if (hasV9_0aOps() && !hasV9_3aOps())
+ AvoidLDAPUR = true;
break;
case Carmel:
CacheLineSize = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d22991224d496d..dca5f5393fe47b 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -38,7 +38,7 @@ class Triple;
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
- Others,
+ Generic,
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
#undef ARM_PROCESSOR_FAMILY
@@ -46,7 +46,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
- ARMProcFamilyEnum ARMProcFamily = Others;
+ ARMProcFamilyEnum ARMProcFamily = Generic;
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..81f920e2c788b1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4272,7 +4272,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
// checking for that case, we can ensure that the default behaviour is
// unchanged
- if (ST->getProcFamily() != AArch64Subtarget::Others &&
+ if (ST->getProcFamily() != AArch64Subtarget::Generic &&
!ST->getSchedModel().isOutOfOrder()) {
UP.Runtime = true;
UP.Partial = true;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index b475e68db411a4..0cdd08535884e9 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -7,6 +7,8 @@
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
More information about the llvm-commits
mailing list