[llvm] [AArch64] Enable CmpBcc fusion for Neoverse-v2 (PR #90608)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 06:36:45 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Elvina Yakubova (ElvinaYakubova)
<details>
<summary>Changes</summary>
This adds compare and branch instructions fusion for Neoverse V2. According to the Software Optimization Guide:
Specific Aarch64 instruction pairs that can be fused are as follows:
CMP/CMN (immediate) + B.cond
CMP/CMN (register) + B.cond
Performance for SPEC2017 is neutral, but another benchmark improves significantly.
Results for SPEC2017 on a Neoverse V2:
500.perlbench 0%
502.gcc_r 0%
505.mcf_r -0.15%
523.xalancbmk_r -0.43%
525.x264_r 0%
531.deepsjeng_r 0%
541.leela_r -0.16%
557.xz_r -0.47%
---
Full diff: https://github.com/llvm/llvm-project/pull/90608.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+1)
- (added) llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll (+35)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 8772e51bf0ab42..af0bd664029f49 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -479,6 +479,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
"Neoverse V2 ARM processors", [
FeatureFuseAES,
+ FeatureCmpBccFusion,
FeatureFuseAdrpAdd,
FeatureALULSLFast,
FeaturePostRAScheduler,
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
new file mode 100644
index 00000000000000..1034e072922a09
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
@@ -0,0 +1,35 @@
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mattr=cmp-bcc-fusion | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78ae | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78c | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a710 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x715 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720ae | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x2 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=neoverse-v2 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s
+
+
+define void @test_cmp_bcc_fusion(i32 %x, i32 %y, i32* %arr) {
+entry:
+ %cmp = icmp eq i32 %x, %y
+ store i32 %x, i32* %arr, align 4
+ br i1 %cmp, label %if_true, label %if_false
+
+if_true:
+ ret void
+
+if_false:
+ ret void
+}
+
+; CHECK-LABEL: test_cmp_bcc_fusion:
+; CHECK: str {{w[0-9]}}, [{{x[0-9]}}]
+; CHECK-NEXT: subs {{w[0-9]}}, {{w[0-9]}}, {{w[0-9]}}
+; CHECK-NEXT: b.ne .LBB0_2
+; CHECK-NEXT: b .LBB0_1
``````````
</details>
https://github.com/llvm/llvm-project/pull/90608
More information about the llvm-commits
mailing list