[llvm] [AArch64] MI Scheduler STP combine (PR #80188)

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 11:53:06 PST 2024


https://github.com/sjoerdmeijer created https://github.com/llvm/llvm-project/pull/80188

Add opcodes for different store instructions to the target hook that can enable more STP pairs. This is split off from the patch that does the same for some load instructions (#79003).

Patch co-authored by Cameron McInally.

>From b3e6e764684f3ce25b51fcc095aab9d86f9119e9 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 1 Feb 2024 01:17:24 +0530
Subject: [PATCH] [AArch64] MI Scheduler STP combine

Add opcodes for different store instructions to the target hook that
can enable more STP pairs. This is split off from the patch that does
the same for some load instructions (#79003).

Patch co-authored by Cameron McInally.
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 15 ++++
 .../test/CodeGen/AArch64/arm64-ldp-cluster.ll | 72 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8e50c16ba0887..6c2feb7af9ca1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4206,6 +4206,21 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
   switch (FirstOpc) {
   default:
     return false;
+  case AArch64::STRSui:
+  case AArch64::STURSi:
+    return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
+  case AArch64::STRDui:
+  case AArch64::STURDi:
+    return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
+  case AArch64::STRQui:
+  case AArch64::STURQi:
+    return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
+  case AArch64::STRWui:
+  case AArch64::STURWi:
+    return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
+  case AArch64::STRXui:
+  case AArch64::STURXi:
+    return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
   case AArch64::LDRSui:
   case AArch64::LDURSi:
     return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
index d1bce2fbfa9f2..8c7b31fd34c48 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefixes=CHECK,CHECK-A57
 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m3 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
 
 ; Test ldr clustering.
@@ -227,3 +227,73 @@ entry:
   store i64 %r53, ptr %wb
   ret void
 }
+
+; CHECK: ********** MI Scheduling **********
+; CHECK: STURWi_STRWui:%bb.0 entry
+; CHECK: Cluster ld/st SU(3) - SU(4)
+; CHECK: SU(3):   STURWi %{{[0-9]+}}:gpr32
+; CHECK: SU(4):   STRWui %{{[0-9]+}}:gpr32
+;
+define void @STURWi_STRWui(ptr nocapture readonly %arg, i32 %b, i32 %c) {
+entry:
+  %r51 = getelementptr i8, ptr %arg, i64 -4
+  store i32 %b, ptr %r51
+  store i32 %c, ptr %arg
+  ret void
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK: STURXi_STRXui:%bb.0 entry
+; CHECK: Cluster ld/st SU(3) - SU(4)
+; CHECK: SU(3):   STURXi %{{[0-9]+}}:gpr64
+; CHECK: SU(4):   STRXui %{{[0-9]+}}:gpr64
+;
+define void @STURXi_STRXui(ptr nocapture readonly %arg, i64 %b, i64 %c) {
+entry:
+  %r51 = getelementptr i8, ptr %arg, i64 -8
+  store i64 %b, ptr %r51
+  store i64 %c, ptr %arg
+  ret void
+}
+
+; CHECK-A57: ********** MI Scheduling **********
+; CHECK-A57: STURSi_STRSui:%bb.0 entry
+; CHECK-A57: Cluster ld/st SU(3) - SU(4)
+; CHECK-A57: SU(3):   STURSi %{{[0-9]+}}:fpr32
+; CHECK-A57: SU(4):   STRSui %{{[0-9]+}}:fpr32
+;
+define void @STURSi_STRSui(ptr nocapture readonly %arg, float %b, float %c) {
+entry:
+  %r51 = getelementptr i8, ptr %arg, i64 -4
+  store float %b, ptr %r51
+  store float %c, ptr %arg
+  ret void
+}
+
+; CHECK-A57: ********** MI Scheduling **********
+; CHECK-A57: STURDi_STRDui:%bb.0 entry
+; CHECK-A57: Cluster ld/st SU(3) - SU(4)
+; CHECK-A57: SU(3):   STURDi %{{[0-9]+}}:fpr64
+; CHECK-A57: SU(4):   STRDui %{{[0-9]+}}:fpr64
+;
+define void @STURDi_STRDui(ptr nocapture readonly %arg, <2 x float> %b, <2 x float> %c) {
+entry:
+  %r51 = getelementptr i8, ptr %arg, i64 -8
+  store <2 x float> %b, ptr %r51
+  store <2 x float> %c, ptr %arg
+  ret void
+}
+
+; CHECK-A57: ********** MI Scheduling **********
+; CHECK-A57: STURQi_STRQui:%bb.0 entry
+; CHECK-A57: Cluster ld/st SU(3) - SU(4)
+; CHECK-A57: SU(3):   STURQi %{{[0-9]+}}:fpr128
+; CHECK-A57: SU(4):   STRQui %{{[0-9]+}}:fpr128
+;
+define void @STURQi_STRQui(ptr nocapture readonly %arg, <2 x double> %b, <2 x double> %c) {
+entry:
+  %r51 = getelementptr i8, ptr %arg, i64 -16
+  store <2 x double> %b, ptr %r51
+  store <2 x double> %c, ptr %arg
+  ret void
+}



More information about the llvm-commits mailing list