[llvm] [AArch64] Add FeatureFuseCCSelect to a number of CPU configurations. (PR #153188)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 06:48:24 PDT 2025


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/153188

>From 7771cf743415afeb474b9aa090bfd186b9d8ba6d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Aug 2025 14:48:13 +0100
Subject: [PATCH] [AArch64] Add FeatureFuseCCSelect to a number of CPU
 configurations.

This marks CMP+CSel as fusable according to the SWOGs of
  cortex-a78
  cortex-a710
  cortex-a715
  cortex-a720
  cortex-a725
  cortex-x4
  cortex-x925
  neoverse-n2
  neoverse-n3
  neoverse-v1
  neoverse-v2
  neoverse-v3
---
 llvm/lib/Target/AArch64/AArch64Processors.td  | 16 +++++++
 .../CodeGen/AArch64/misched-fusion-csel.ll    | 43 ++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 1bc1d98a6f65b..c3627b802fe14 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -134,6 +134,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
                                FeatureCmpBccFusion,
                                FeatureFuseAES,
                                FeatureFuseAdrpAdd,
+                               FeatureFuseCCSelect,
                                FeatureAddrLSLSlow14,
                                FeatureALULSLFast,
                                FeaturePostRAScheduler,
@@ -146,6 +147,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily",
                                  FeatureCmpBccFusion,
                                  FeatureFuseAES,
                                  FeatureFuseAdrpAdd,
+                                 FeatureFuseCCSelect,
                                  FeatureAddrLSLSlow14,
                                  FeatureALULSLFast,
                                  FeaturePostRAScheduler,
@@ -158,6 +160,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
                                 FeatureCmpBccFusion,
                                 FeatureFuseAES,
                                 FeatureFuseAdrpAdd,
+                                FeatureFuseCCSelect,
                                 FeatureAddrLSLSlow14,
                                 FeatureALULSLFast,
                                 FeaturePostRAScheduler,
@@ -169,6 +172,7 @@ def TuneA710    : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
                                    FeatureCmpBccFusion,
                                    FeatureFuseAES,
                                    FeatureFuseAdrpAdd,
+                                   FeatureFuseCCSelect,
                                    FeatureALULSLFast,
                                    FeaturePostRAScheduler,
                                    FeatureEnableSelectOptimize,
@@ -181,6 +185,7 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
                                  FeatureCmpBccFusion,
                                  FeatureALULSLFast,
                                  FeatureFuseAdrpAdd,
+                                 FeatureFuseCCSelect,
                                  FeatureEnableSelectOptimize,
                                  FeaturePredictableSelectIsExpensive]>;
 
@@ -191,6 +196,7 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
                                  FeatureCmpBccFusion,
                                  FeatureALULSLFast,
                                  FeatureFuseAdrpAdd,
+                                 FeatureFuseCCSelect,
                                  FeatureEnableSelectOptimize,
                                  FeaturePredictableSelectIsExpensive]>;
 
@@ -201,6 +207,7 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720",
                                  FeatureCmpBccFusion,
                                  FeatureALULSLFast,
                                  FeatureFuseAdrpAdd,
+                                 FeatureFuseCCSelect,
                                  FeatureEnableSelectOptimize,
                                  FeaturePredictableSelectIsExpensive]>;
 
@@ -212,6 +219,7 @@ def TuneA725 : SubtargetFeature<"cortex-a725", "ARMProcFamily",
                                 FeatureCmpBccFusion,
                                 FeatureALULSLFast,
                                 FeatureFuseAdrpAdd,
+                                FeatureFuseCCSelect,
                                 FeatureEnableSelectOptimize,
                                 FeaturePredictableSelectIsExpensive]>;
 
@@ -262,6 +270,7 @@ def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
                               "Cortex-X4 ARM processors", [
                                FeatureALULSLFast,
                                FeatureFuseAdrpAdd,
+                               FeatureFuseCCSelect,
                                FeatureFuseAES,
                                FeaturePostRAScheduler,
                                FeatureEnableSelectOptimize,
@@ -273,6 +282,7 @@ def TuneX925 : SubtargetFeature<"cortex-x925", "ARMProcFamily",
                                 "CortexX925", "Cortex-X925 ARM processors",[
                                 FeatureALULSLFast,
                                 FeatureFuseAdrpAdd,
+                                FeatureFuseCCSelect,
                                 FeatureFuseAES,
                                 FeaturePostRAScheduler,
                                 FeatureEnableSelectOptimize,
@@ -536,6 +546,7 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
                                       "Neoverse N2 ARM processors", [
                                       FeatureFuseAES,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeatureALULSLFast,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
@@ -547,6 +558,7 @@ def TuneNeoverseN3 : SubtargetFeature<"neoversen3", "ARMProcFamily", "NeoverseN3
                                       FeaturePostRAScheduler,
                                       FeatureALULSLFast,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeatureEnableSelectOptimize,
                                       FeaturePredictableSelectIsExpensive]>;
 
@@ -563,6 +575,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
                                       "Neoverse V1 ARM processors", [
                                       FeatureFuseAES,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeatureAddrLSLSlow14,
                                       FeatureALULSLFast,
                                       FeaturePostRAScheduler,
@@ -575,6 +588,7 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
                                       FeatureFuseAES,
                                       FeatureCmpBccFusion,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeatureALULSLFast,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
@@ -588,6 +602,7 @@ def TuneNeoverseV3 : SubtargetFeature<"neoversev3", "ARMProcFamily", "NeoverseV3
                                       FeatureFuseAES,
                                       FeatureALULSLFast,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
                                       FeatureAvoidLDAPUR,
@@ -598,6 +613,7 @@ def TuneNeoverseV3AE : SubtargetFeature<"neoversev3AE", "ARMProcFamily", "Neover
                                       FeatureFuseAES,
                                       FeatureALULSLFast,
                                       FeatureFuseAdrpAdd,
+                                      FeatureFuseCCSelect,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
                                       FeatureAvoidLDAPUR,
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-csel.ll b/llvm/test/CodeGen/AArch64/misched-fusion-csel.ll
index ac0adb7f85d0d..8fa60ee93663d 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-csel.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-csel.ll
@@ -1,9 +1,42 @@
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel | FileCheck %s
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3  | FileCheck %s
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4  | FileCheck %s
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5  | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a710 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a715 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a720 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a725 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x4 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x925 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n2 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v2 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
+; REQUIRES: asserts
 
-target triple = "aarch64-unknown"
+; Check that the scheduling model has an edge between the SUBS and the CSEL.
+; CHECK-LABEL: test_sub_cselw:%bb.0
+; CHECK: SU(2):   %3:gpr32common = ADDWri %1:gpr32common, 7, 0
+; CHECK: SU(3):   dead $wzr = SUBSWri %0:gpr32common, 13, 0, implicit-def $nzcv
+; CHECK:   Successors:
+; CHECK:     SU(4): Ord  Latency=0 Cluster
+; CHECK: SU(4):   %5:gpr32 = CSELWr %0:gpr32common, %3:gpr32common, 0, implicit killed $nzcv
+; CHECK:   Predecessors:
+; CHECK:     SU(3): Ord  Latency=0 Cluster
+; CHECK: SU(5):   $w0 = COPY %5:gpr32
+
+
+; CHECK-LABEL: test_sub_cselx:%bb.0
+; CHECK: SU(2):   %3:gpr64common = ADDXri %1:gpr64common, 7, 0
+; CHECK: SU(3):   dead $xzr = SUBSXri %0:gpr64common, 13, 0, implicit-def $nzcv
+; CHECK:   Successors:
+; CHECK:     SU(4): Ord  Latency=0 Cluster
+; CHECK: SU(4):   %5:gpr64 = CSELXr %0:gpr64common, %3:gpr64common, 0, implicit killed $nzcv
+; CHECK:   Predecessors:
+; CHECK:     SU(3): Ord  Latency=0 Cluster
+; CHECK: SU(5):   $x0 = COPY %5:gpr64
 
 define i32 @test_sub_cselw(i32 %a0, i32 %a1, i32 %a2) {
 entry:



More information about the llvm-commits mailing list