[llvm] 2f083b3 - [AArch64] Fix resource length computation for STP. (#81749)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 16 08:10:13 PST 2024


Author: Florian Hahn
Date: 2024-02-16T16:10:10Z
New Revision: 2f083b364f43fb12b2fdf23935e1f0b6958d0882

URL: https://github.com/llvm/llvm-project/commit/2f083b364f43fb12b2fdf23935e1f0b6958d0882
DIFF: https://github.com/llvm/llvm-project/commit/2f083b364f43fb12b2fdf23935e1f0b6958d0882.diff

LOG: [AArch64] Fix resource length computation for STP. (#81749)

On some uArchs, `STP [s|d], [s|d]` first combines the 2 input registers
in a single register using a vector execution unit. IIUC
AArch64StorePairSuppress tries to prevent forming STPs in case the
critical resource are the vector units, in order to prevent adding more
pressure on those units.

The implementation however simply computes the new critical resource
length by adding resource for another STP. If load/store units are the
critical resource, this means we increase that length by one, and
incorrectly prevent forming the STP.

This patch adjusts the resource computation by also removing 2 STRs, as
introducing a STP will remove 2 single stores. This should more
accurately reflect the resource usage after introducing an STP, and does
not prevent forming STPs if load/store units are the critical resources;
in those cases, STP can actually help to reduce resource usage.

PR: https://github.com/llvm/llvm-project/pull/81749

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
    llvm/test/CodeGen/AArch64/arm64-stur.ll
    llvm/test/CodeGen/AArch64/merge-store.ll
    llvm/test/CodeGen/AArch64/storepairsuppress.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 7324be48a415ad..773c309a0943e3 100644
--- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -81,15 +81,23 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
   MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
   unsigned ResLength = BBTrace.getResourceLength();
 
-  // Get the machine model's scheduling class for STPQi.
+  // Get the machine model's scheduling class for STPDi and STRDui.
   // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
   unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
-  const MCSchedClassDesc *SCDesc =
+  const MCSchedClassDesc *PairSCDesc =
       SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
 
-  // If a subtarget does not define resources for STPQi, bail here.
-  if (SCDesc->isValid() && !SCDesc->isVariant()) {
-    unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
+  unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass();
+  const MCSchedClassDesc *SingleSCDesc =
+      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2);
+
+  // If a subtarget does not define resources for STPDi, bail here.
+  if (PairSCDesc->isValid() && !PairSCDesc->isVariant() &&
+      SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) {
+    // Compute the new critical resource length after replacing 2 separate
+    // STRDui with one STPDi.
+    unsigned ResLenWithSTP = BBTrace.getResourceLength(
+        std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc});
     if (ResLenWithSTP > ResLength) {
       LLVM_DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
                         << " resources " << ResLength << " -> " << ResLenWithSTP

diff  --git a/llvm/test/CodeGen/AArch64/arm64-stur.ll b/llvm/test/CodeGen/AArch64/arm64-stur.ll
index 2a74abb10226da..7d9de9e28ff5c0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stur.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stur.ll
@@ -65,9 +65,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
 ; CHECK-LABEL: unaligned:
 ; CHECK-NOT: str q0
-; CHECK: str     d[[REG:[0-9]+]], [x0]
-; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
-; CHECK: str     d[[REG2]], [x0, #8]
+; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG:[0-9]+]], v[[REG]], #8
+; CHECK: stp     d[[REG]], d[[REG2]], [x0]
 define void @unaligned(ptr %p, <4 x i32> %v) nounwind {
   store <4 x i32> %v, ptr %p, align 4
   ret void

diff  --git a/llvm/test/CodeGen/AArch64/merge-store.ll b/llvm/test/CodeGen/AArch64/merge-store.ll
index b93d0c3bc96086..6653984562ae6d 100644
--- a/llvm/test/CodeGen/AArch64/merge-store.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store.ll
@@ -45,8 +45,7 @@ define void @merge_vec_extract_stores(<4 x float> %v1, ptr %ptr) {
 ; SPLITTING-LABEL: merge_vec_extract_stores:
 ; SPLITTING:       // %bb.0:
 ; SPLITTING-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; SPLITTING-NEXT:    str d0, [x0, #24]
-; SPLITTING-NEXT:    str d1, [x0, #32]
+; SPLITTING-NEXT:    stp d0, d1, [x0, #24]
 ; SPLITTING-NEXT:    ret
 ;
 ; MISALIGNED-LABEL: merge_vec_extract_stores:

diff  --git a/llvm/test/CodeGen/AArch64/storepairsuppress.ll b/llvm/test/CodeGen/AArch64/storepairsuppress.ll
index 9892c09581ea2b..0571bbc278a6f4 100644
--- a/llvm/test/CodeGen/AArch64/storepairsuppress.ll
+++ b/llvm/test/CodeGen/AArch64/storepairsuppress.ll
@@ -44,12 +44,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s0, s5, s0, s1
 ; SUPPRESS-NEXT:    fadd s1, s4, s2
 ; SUPPRESS-NEXT:    fadd s5, s0, s3
-; SUPPRESS-NEXT:    str s1, [x8]
-; SUPPRESS-NEXT:    str s5, [x8, #4]
+; SUPPRESS-NEXT:    stp s1, s5, [x8]
 ; SUPPRESS-NEXT:    fsub s2, s2, s4
 ; SUPPRESS-NEXT:    fsub s0, s3, s0
-; SUPPRESS-NEXT:    str s2, [x8, #8]
-; SUPPRESS-NEXT:    str s0, [x8, #12]
+; SUPPRESS-NEXT:    stp s2, s0, [x8, #8]
 ; SUPPRESS-NEXT:    ldr x9, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s3, s4, [x9]
 ; SUPPRESS-NEXT:    ldp s6, s7, [x8, #16]
@@ -60,12 +58,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s3, s17, s3, s4
 ; SUPPRESS-NEXT:    fadd s4, s16, s6
 ; SUPPRESS-NEXT:    fadd s17, s3, s7
-; SUPPRESS-NEXT:    str s4, [x8, #16]
-; SUPPRESS-NEXT:    str s17, [x8, #20]
+; SUPPRESS-NEXT:    stp s4, s17, [x8, #16]
 ; SUPPRESS-NEXT:    fsub s6, s6, s16
 ; SUPPRESS-NEXT:    fsub s3, s7, s3
-; SUPPRESS-NEXT:    str s6, [x8, #24]
-; SUPPRESS-NEXT:    str s3, [x8, #28]
+; SUPPRESS-NEXT:    stp s6, s3, [x8, #24]
 ; SUPPRESS-NEXT:    ldr x9, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s7, s16, [x9]
 ; SUPPRESS-NEXT:    fmul s18, s16, s17
@@ -74,12 +70,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s4, s16, s4, s17
 ; SUPPRESS-NEXT:    fadd s16, s7, s1
 ; SUPPRESS-NEXT:    fadd s17, s4, s5
-; SUPPRESS-NEXT:    str s16, [x8]
-; SUPPRESS-NEXT:    str s17, [x8, #4]
+; SUPPRESS-NEXT:    stp s16, s17, [x8]
 ; SUPPRESS-NEXT:    fsub s1, s1, s7
 ; SUPPRESS-NEXT:    fsub s4, s5, s4
-; SUPPRESS-NEXT:    str s1, [x8, #16]
-; SUPPRESS-NEXT:    str s4, [x8, #20]
+; SUPPRESS-NEXT:    stp s1, s4, [x8, #16]
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    lsl x9, x3, #4
 ; SUPPRESS-NEXT:    add x10, x10, x9
@@ -90,12 +84,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s3, s4, s6, s3
 ; SUPPRESS-NEXT:    fadd s4, s1, s2
 ; SUPPRESS-NEXT:    fadd s5, s3, s0
-; SUPPRESS-NEXT:    str s4, [x8, #8]
-; SUPPRESS-NEXT:    str s5, [x8, #12]
+; SUPPRESS-NEXT:    stp s4, s5, [x8, #8]
 ; SUPPRESS-NEXT:    fsub s1, s2, s1
 ; SUPPRESS-NEXT:    fsub s0, s0, s3
-; SUPPRESS-NEXT:    str s1, [x8, #24]
-; SUPPRESS-NEXT:    str s0, [x8, #28]
+; SUPPRESS-NEXT:    stp s1, s0, [x8, #24]
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s0, s1, [x10]
 ; SUPPRESS-NEXT:    ldp s2, s3, [x8, #32]
@@ -106,12 +98,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s0, s5, s0, s1
 ; SUPPRESS-NEXT:    fadd s1, s4, s2
 ; SUPPRESS-NEXT:    fadd s5, s0, s3
-; SUPPRESS-NEXT:    str s1, [x8, #32]
-; SUPPRESS-NEXT:    str s5, [x8, #36]
+; SUPPRESS-NEXT:    stp s1, s5, [x8, #32]
 ; SUPPRESS-NEXT:    fsub s2, s2, s4
 ; SUPPRESS-NEXT:    fsub s3, s3, s0
-; SUPPRESS-NEXT:    str s2, [x8, #40]
-; SUPPRESS-NEXT:    str s3, [x8, #44]
+; SUPPRESS-NEXT:    stp s2, s3, [x8, #40]
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s0, s4, [x10]
 ; SUPPRESS-NEXT:    ldp s6, s7, [x8, #48]
@@ -122,12 +112,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s0, s17, s0, s4
 ; SUPPRESS-NEXT:    fadd s4, s16, s6
 ; SUPPRESS-NEXT:    fadd s17, s0, s7
-; SUPPRESS-NEXT:    str s4, [x8, #48]
-; SUPPRESS-NEXT:    str s17, [x8, #52]
+; SUPPRESS-NEXT:    stp s4, s17, [x8, #48]
 ; SUPPRESS-NEXT:    fsub s6, s6, s16
 ; SUPPRESS-NEXT:    fsub s0, s7, s0
-; SUPPRESS-NEXT:    str s6, [x8, #56]
-; SUPPRESS-NEXT:    str s0, [x8, #60]
+; SUPPRESS-NEXT:    stp s6, s0, [x8, #56]
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s7, s16, [x10]
 ; SUPPRESS-NEXT:    fmul s18, s16, s17
@@ -136,12 +124,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s4, s16, s4, s17
 ; SUPPRESS-NEXT:    fadd s16, s7, s1
 ; SUPPRESS-NEXT:    fadd s17, s4, s5
-; SUPPRESS-NEXT:    str s16, [x8, #32]
-; SUPPRESS-NEXT:    str s17, [x8, #36]
+; SUPPRESS-NEXT:    stp s16, s17, [x8, #32]
 ; SUPPRESS-NEXT:    fsub s7, s1, s7
 ; SUPPRESS-NEXT:    fsub s4, s5, s4
-; SUPPRESS-NEXT:    str s7, [x8, #48]
-; SUPPRESS-NEXT:    str s4, [x8, #52]
+; SUPPRESS-NEXT:    stp s7, s4, [x8, #48]
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    add x9, x10, x9
 ; SUPPRESS-NEXT:    ldp s1, s5, [x9]
@@ -151,12 +137,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s5, s5, s6, s0
 ; SUPPRESS-NEXT:    fadd s6, s1, s2
 ; SUPPRESS-NEXT:    fadd s18, s5, s3
-; SUPPRESS-NEXT:    str s6, [x8, #40]
-; SUPPRESS-NEXT:    str s18, [x8, #44]
+; SUPPRESS-NEXT:    stp s6, s18, [x8, #40]
 ; SUPPRESS-NEXT:    fsub s0, s2, s1
 ; SUPPRESS-NEXT:    fsub s1, s3, s5
-; SUPPRESS-NEXT:    str s0, [x8, #56]
-; SUPPRESS-NEXT:    str s1, [x8, #60]
+; SUPPRESS-NEXT:    stp s0, s1, [x8, #56]
 ; SUPPRESS-NEXT:    ldr x9, [x0, #8]
 ; SUPPRESS-NEXT:    ldp s2, s3, [x9]
 ; SUPPRESS-NEXT:    ldp s5, s19, [x8]
@@ -166,12 +150,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s2, s17, s2, s3
 ; SUPPRESS-NEXT:    fadd s3, s16, s5
 ; SUPPRESS-NEXT:    fadd s17, s2, s19
-; SUPPRESS-NEXT:    str s3, [x8]
-; SUPPRESS-NEXT:    str s17, [x8, #4]
+; SUPPRESS-NEXT:    stp s3, s17, [x8]
 ; SUPPRESS-NEXT:    fsub s3, s5, s16
 ; SUPPRESS-NEXT:    fsub s2, s19, s2
-; SUPPRESS-NEXT:    str s3, [x8, #32]
-; SUPPRESS-NEXT:    str s2, [x8, #36]
+; SUPPRESS-NEXT:    stp s3, s2, [x8, #32]
 ; SUPPRESS-NEXT:    ldr x9, [x0, #8]
 ; SUPPRESS-NEXT:    add x9, x9, w3, sxtw #3
 ; SUPPRESS-NEXT:    ldp s2, s3, [x9]
@@ -182,12 +164,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s2, s18, s2, s3
 ; SUPPRESS-NEXT:    fadd s3, s6, s5
 ; SUPPRESS-NEXT:    fadd s17, s2, s16
-; SUPPRESS-NEXT:    str s3, [x8, #8]
-; SUPPRESS-NEXT:    str s17, [x8, #12]
+; SUPPRESS-NEXT:    stp s3, s17, [x8, #8]
 ; SUPPRESS-NEXT:    fsub s3, s5, s6
 ; SUPPRESS-NEXT:    fsub s2, s16, s2
-; SUPPRESS-NEXT:    str s3, [x8, #40]
-; SUPPRESS-NEXT:    str s2, [x8, #44]
+; SUPPRESS-NEXT:    stp s3, s2, [x8, #40]
 ; SUPPRESS-NEXT:    lsl x9, x3, #33
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    add x9, x10, x9, asr #29
@@ -199,12 +179,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s2, s4, s2, s3
 ; SUPPRESS-NEXT:    fadd s3, s7, s5
 ; SUPPRESS-NEXT:    fadd s4, s2, s6
-; SUPPRESS-NEXT:    str s3, [x8, #16]
-; SUPPRESS-NEXT:    str s4, [x8, #20]
+; SUPPRESS-NEXT:    stp s3, s4, [x8, #16]
 ; SUPPRESS-NEXT:    fsub s3, s5, s7
 ; SUPPRESS-NEXT:    fsub s2, s6, s2
-; SUPPRESS-NEXT:    str s3, [x8, #48]
-; SUPPRESS-NEXT:    str s2, [x8, #52]
+; SUPPRESS-NEXT:    stp s3, s2, [x8, #48]
 ; SUPPRESS-NEXT:    add w9, w3, w3, lsl #1
 ; SUPPRESS-NEXT:    ldr x10, [x0, #8]
 ; SUPPRESS-NEXT:    add x9, x10, w9, sxtw #3
@@ -216,12 +194,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
 ; SUPPRESS-NEXT:    fmadd s1, s1, s2, s3
 ; SUPPRESS-NEXT:    fadd s2, s0, s4
 ; SUPPRESS-NEXT:    fadd s3, s1, s5
-; SUPPRESS-NEXT:    str s2, [x8, #24]
-; SUPPRESS-NEXT:    str s3, [x8, #28]
+; SUPPRESS-NEXT:    stp s2, s3, [x8, #24]
 ; SUPPRESS-NEXT:    fsub s0, s4, s0
 ; SUPPRESS-NEXT:    fsub s1, s5, s1
-; SUPPRESS-NEXT:    str s0, [x8, #56]
-; SUPPRESS-NEXT:    str s1, [x8, #60]
+; SUPPRESS-NEXT:    stp s0, s1, [x8, #56]
 ; SUPPRESS-NEXT:    ret
 ;
 ; NOSUPPRESS-LABEL: load_store_units_critical:


        


More information about the llvm-commits mailing list