[llvm-branch-commits] [llvm] release/20.x: [AArch64] Enable AvoidLDAPUR for cpu=generic between armv8.4 and armv9.3. (#125261) (PR #126253)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 7 07:34:39 PST 2025
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126253
Backport 6424abcd6c9c6aa8171c79d0fe0369d3a10da3d5
Requested by: @davemgreen
>From 521366d7d3d97b8c61e6c2a868c20149ef80263a Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 7 Feb 2025 10:16:57 +0000
Subject: [PATCH] [AArch64] Enable AvoidLDAPUR for cpu=generic between armv8.4
and armv9.3. (#125261)
As added in #124274, CPUs in this range can suffer from performance
issues with ldapur. As the gain from ldar->ldapr is expected to be
greater than the minor gain from ldapr->ldapur, this opts to avoid the
instruction under the default -mcpu=generic when the -march is less that
armv8.8 / armv9.3.
I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be
clearer what it means.
(cherry picked from commit 6424abcd6c9c6aa8171c79d0fe0369d3a10da3d5)
---
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 7 ++++++-
llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 ++--
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
.../AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll | 7 +++++--
4 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index bc921f07e1dbf89..809e658e6538017 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -125,7 +125,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
// this in the future so we can specify it together with the subtarget
// features.
switch (ARMProcFamily) {
- case Others:
+ case Generic:
+ // Using TuneCPU=generic we avoid ldapur instructions to line up with the
+ // cpus that use the AvoidLDAPUR feature. We don't want this to be on
+ // forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
+ if (hasV8_4aOps() && !hasV8_8aOps())
+ AvoidLDAPUR = true;
break;
case Carmel:
CacheLineSize = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d22991224d496d9..dca5f5393fe47b3 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -38,7 +38,7 @@ class Triple;
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
- Others,
+ Generic,
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
#undef ARM_PROCESSOR_FAMILY
@@ -46,7 +46,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
- ARMProcFamilyEnum ARMProcFamily = Others;
+ ARMProcFamilyEnum ARMProcFamily = Generic;
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 4af3c482e65984b..cd994d53a60088f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4272,7 +4272,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
// checking for that case, we can ensure that the default behaviour is
// unchanged
- if (ST->getProcFamily() != AArch64Subtarget::Others &&
+ if (ST->getProcFamily() != AArch64Subtarget::Generic &&
!ST->getSchedModel().isOutOfOrder()) {
UP.Runtime = true;
UP.Partial = true;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index b475e68db411a42..02ff12c27fcda9d 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -1,12 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "(?!^\s*lda.*\bsp\b)^\s*.*\bsp\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo,avoid-ldapur -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v2 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
More information about the llvm-branch-commits
mailing list