[llvm] 44b686e - [AArch64] Unfold adds when eliminating frame index with scalable offset (#158597)

Tue Sep 16 07:31:03 PDT 2025

Author: Hongyu Chen
Date: 2025-09-16T14:30:57Z
New Revision: 44b686e52ec462c173d0b114172db2a783215116

URL: https://github.com/llvm/llvm-project/commit/44b686e52ec462c173d0b114172db2a783215116
DIFF: https://github.com/llvm/llvm-project/commit/44b686e52ec462c173d0b114172db2a783215116.diff

LOG: [AArch64] Unfold adds when eliminating frame index with scalable offset (#158597)

Fixes https://github.com/llvm/llvm-project/issues/157252.
Peephole optimization tends to fold:
```
add %gpr1, %stack, 0
subs %gpr2, %gpr1, 0
```
to
```
adds %gpr2, %stack, 0
```

This patch undoes the fold in `rewriteAArch64FrameIndex` to process
`adds` on the stack object.

Added: 
    llvm/test/CodeGen/AArch64/pr157252.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9a7512b77ecdb..5a51c812732e6 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6273,6 +6273,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
       Offset, Bytes, NumPredicateVectors, NumDataVectors);
 
+  // Insert ADDSXri for scalable offset at the end.
+  bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
+  if (NeedsFinalDefNZCV)
+    SetNZCV = false;
+
   // First emit non-scalable frame offsets, or a simple 'mov'.
   if (Bytes || (!Offset && SrcReg != DestReg)) {
     assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
@@ -6292,8 +6297,6 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
     FrameReg = DestReg;
   }
 
-  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
-         "SetNZCV not supported with SVE vectors");
   assert(!(NeedsWinCFI && NumPredicateVectors) &&
          "WinCFI can't allocate fractions of an SVE data vector");
 
@@ -6313,6 +6316,12 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                        Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
                        FrameReg);
   }
+
+  if (NeedsFinalDefNZCV)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
+        .addReg(DestReg)
+        .addImm(0)
+        .addImm(0);
 }
 
 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(

diff  --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir
new file mode 100644
index 0000000000000..319e54f0fa7e9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.mir
@@ -0,0 +1,25 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -frame-pointer=none -o - %s | FileCheck %s
+---
+name: test_addsxri_scalable_offset
+stack:
+  - { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
+  - { id: 1, type: default, size: 16, alignment: 16, stack-id: scalable-vector }
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_addsxri_scalable_offset
+    ; CHECK: liveins: $fp
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
+    ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+    ; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0
+    ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
+    ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
+    ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+    ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x8
+    $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
+    RET_ReallyLR implicit $x8
+...