[llvm] [AArch64][GISel] Fold G_SHL used by multiple G_PTR_ADD into load/store addressing mode (PR #96603)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 25 01:10:48 PDT 2024


https://github.com/Him188 created https://github.com/llvm/llvm-project/pull/96603

This patch fixes GISel 15% regression in TSVC kernel s482. It also brings regression in s291 from 20% to 10%.


>From 8010989545499509ebe346afaa16a54f7ae05334 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 24 Jun 2024 11:43:56 +0100
Subject: [PATCH] [AArch64][GISel] Fold G_SHL used by multiple G_PTR_ADD into
 load/store addressing mode

This patch fixes GISel 15% regression in TSVC kernel s482.
It also brings regression in s291 from 20% to 10%.
---
 .../GISel/AArch64InstructionSelector.cpp      | 17 +++++++++--
 .../GlobalISel/addressing-modes-multiple.mir  | 29 +++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/addressing-modes-multiple.mir

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 0357a7206c478..efbef473a9e5a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -6717,6 +6717,20 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
   return select12BitValueWithLeftShift(Immed);
 }
 
+/// Returns true if the def of MI is only used by memory operations.
+/// If the def is G_SHL, we also check indirect usages through G_PTR_ADD.
+static bool onlyUsedInMemoryOps(MachineInstr &MI, const MachineRegisterInfo &MRI) {
+  const Register DefReg = MI.getOperand(0).getReg();
+  return all_of(MRI.use_nodbg_instructions(DefReg),
+                [&](MachineInstr &Use) {
+                  if (MI.getOpcode() == AArch64::G_SHL &&
+                    Use.getOpcode() == AArch64::G_PTR_ADD &&
+                    onlyUsedInMemoryOps(Use, MRI))
+                    return true;
+                  return Use.mayLoadOrStore();
+                });
+}
+
 /// Return true if it is worth folding MI into an extended register. That is,
 /// if it's safe to pull it into the addressing mode of a load or store as a
 /// shift.
@@ -6734,8 +6748,7 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
   // We have a fastpath, so folding a shift in and potentially computing it
   // many times may be beneficial. Check if this is only used in memory ops.
   // If it is, then we should fold.
-  return all_of(MRI.use_nodbg_instructions(DefReg),
-                [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
+  return onlyUsedInMemoryOps(MI, MRI);
 }
 
 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/addressing-modes-multiple.mir b/llvm/test/CodeGen/AArch64/GlobalISel/addressing-modes-multiple.mir
new file mode 100644
index 0000000000000..cfcdcf84be9dd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/addressing-modes-multiple.mir
@@ -0,0 +1,29 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-linux-gnu -O3 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s
+
+---
+name:            shl_multiple_ptr_add
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: shl_ptr_add
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+    ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64))
+    ; CHECK-NEXT: STRXroX [[LDRXroX]], [[COPY1]], [[COPY]], 0, 1 :: (store (s64))
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 3
+    %2:gpr(s64) = G_SHL %0, %1(s64) ; %2 used by multiple G_PTR_ADD
+    %3:gpr(p0) = COPY $x1
+    %4:gpr(p0) = G_PTR_ADD %3, %2
+    %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64))
+    %ptr:gpr(p0) = G_PTR_ADD %3, %2
+    G_STORE %5, %ptr :: (store (s64))
+...



More information about the llvm-commits mailing list