[llvm] [AArch64] Enable AvoidLDAPUR for cpu=generic between armv9.0 and armv9.3. (PR #125261)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 09:51:45 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
As added in #<!-- -->124274, CPUs in this range can suffer from performance issues with ldapur. As the gain from ldar->ldapr is expected to be greater than the minor gain from ldapr->ldapur, this opts to avoid the instruction under the default -mcpu=generic when the -march is less that armv9.3.
I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be clearer what it means.
---
Full diff: https://github.com/llvm/llvm-project/pull/125261.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+6-1)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+2-2)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+1-1)
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll (+2)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index bc921f07e1dbf8..89e3e64e984421 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -125,7 +125,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
// this in the future so we can specify it together with the subtarget
// features.
switch (ARMProcFamily) {
- case Others:
+ case Generic:
+ // Using TuneCPU=generic we avoid ldapur instructions to line up with the
+ // cpus that use the AvoidLDAPUR feature. We don't want this to be on
+ // forever, so it is enabled between armv9.0 and armv9.2.
+ if (hasV9_0aOps() && !hasV9_3aOps())
+ AvoidLDAPUR = true;
break;
case Carmel:
CacheLineSize = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d22991224d496d..dca5f5393fe47b 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -38,7 +38,7 @@ class Triple;
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
- Others,
+ Generic,
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
#undef ARM_PROCESSOR_FAMILY
@@ -46,7 +46,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
- ARMProcFamilyEnum ARMProcFamily = Others;
+ ARMProcFamilyEnum ARMProcFamily = Generic;
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..81f920e2c788b1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4272,7 +4272,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
// checking for that case, we can ensure that the default behaviour is
// unchanged
- if (ST->getProcFamily() != AArch64Subtarget::Others &&
+ if (ST->getProcFamily() != AArch64Subtarget::Generic &&
!ST->getSchedModel().isOutOfOrder()) {
UP.Runtime = true;
UP.Partial = true;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index b475e68db411a4..0cdd08535884e9 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -7,6 +7,8 @@
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
``````````
</details>
https://github.com/llvm/llvm-project/pull/125261
More information about the llvm-commits
mailing list