[llvm] [AArch64] Unfold adds when eliminating frame index with scalable offset (PR #158597)

Hongyu Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 16 06:06:37 PDT 2025


https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/158597

>From 27c6b10a95f3056e24ae5381f21233447c1816f3 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 15 Sep 2025 18:16:50 +0800
Subject: [PATCH 1/7] [AArch64] Avoid apply S-form on frame index in peephole

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp |  5 +
 llvm/test/CodeGen/AArch64/pr157252.ll        | 96 ++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e56fe90259d5c..2c09710831808 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1920,6 +1920,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
           CmpInstr.getOperand(2).getImm() == 0) &&
          "Caller guarantees that CmpInstr compares with constant 0");
 
+  // NZCV is not supported if the stack offset is scalable.
+  auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
+  if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
+    return false;
+
   std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
   if (!NZVCUsed || NZVCUsed->C)
     return false;
diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll
new file mode 100644
index 0000000000000..c3b296a795157
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @i(ptr %ad, ptr %0) #0 {
+; CHECK-LABEL: i:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d11, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    stp x28, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w28, -16
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:  // %bb.1: // %asm.fallthrough
+; CHECK-NEXT:  .LBB0_2: // Inline asm indirect target
+; CHECK-NEXT:    // %ah.preheader.preheader
+; CHECK-NEXT:    // Label of block must be emitted
+; CHECK-NEXT:    mov x8, #-35417 // =0xffffffffffff75a7
+; CHECK-NEXT:    mov x9, #35417 // =0x8a59
+; CHECK-NEXT:    mov w19, #1 // =0x1
+; CHECK-NEXT:    movk x8, #29436, lsl #16
+; CHECK-NEXT:    movk x9, #36099, lsl #16
+; CHECK-NEXT:    stp x1, x0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    movk x8, #64591, lsl #32
+; CHECK-NEXT:    movk x9, #944, lsl #32
+; CHECK-NEXT:    index z0.d, x9, x8
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:  .LBB0_3: // Inline asm indirect target
+; CHECK-NEXT:    // %ah
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    // Label of block must be emitted
+; CHECK-NEXT:    sub x9, x29, #16
+; CHECK-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    str d0, [x8]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:  // %bb.4: // %asm.fallthrough2
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    add x8, sp, #28
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    ldp x10, x8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    cset w9, ne
+; CHECK-NEXT:    strb w19, [x10]
+; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    b .LBB0_3
+entry:
+  %aj = alloca i32, align 4
+  callbr void asm sideeffect "", "!i,!i"()
+          to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
+
+ah.preheader.preheader:                           ; preds = %entry, %entry
+  %conv = xor i8 0, 1
+  br label %ah
+
+asm.fallthrough:                                  ; preds = %entry
+  unreachable
+
+ah:                                               ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
+  %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
+  %vecext = extractelement <8 x i64> %af.2, i64 0
+  store i64 %vecext, ptr %ad, align 8
+  call void asm sideeffect "", "~{v11}"()
+  callbr void asm sideeffect "", "!i"()
+          to label %asm.fallthrough2 [label %ah]
+
+asm.fallthrough2:                                 ; preds = %ah
+  %call = call i32 @g()
+  store i8 %conv, ptr %0, align 1
+  %cmp = icmp ne ptr %aj, null
+  %conv3 = zext i1 %cmp to i32
+  store i32 %conv3, ptr %ad, align 4
+  callbr void asm sideeffect "", "!i"()
+          to label %ah [label %ah]
+}
+
+declare i32 @g(...)
+
+attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }

>From a30b328fbbe1c055c97bfcb40aed7d1fb88417ba Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:48:50 +0800
Subject: [PATCH 2/7] Unfold adds

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp |  57 +++++--
 llvm/test/CodeGen/AArch64/pr157252.ll        |  96 ------------
 llvm/test/CodeGen/AArch64/pr157252.mir       | 154 +++++++++++++++++++
 3 files changed, 197 insertions(+), 110 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll
 create mode 100644 llvm/test/CodeGen/AArch64/pr157252.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 2c09710831808..17a9999495602 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1920,11 +1920,6 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
           CmpInstr.getOperand(2).getImm() == 0) &&
          "Caller guarantees that CmpInstr compares with constant 0");
 
-  // NZCV is not supported if the stack offset is scalable.
-  auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
-  if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
-    return false;
-
   std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
   if (!NZVCUsed || NZVCUsed->C)
     return false;
@@ -6569,18 +6564,52 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
 }
 
+// Unfold ADDSXri:
+//    adds %dest, %stack, c
+//  -->
+//    add %dest, %stack, 0
+//    adds %dest, %dest, c
+static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
+                                  const AArch64InstrInfo *TII) {
+  auto *MBB = MI.getParent();
+  Register DestReg = MI.getOperand(0).getReg();
+
+  auto *Unfolded =
+      BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg)
+          .addReg(FrameReg)
+          .addImm(0)
+          .addImm(0)
+          .getInstr();
+
+  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg)
+      .addReg(DestReg)
+      .addImm(MI.getOperand(2).getImm())
+      .addImm(MI.getOperand(3).getImm());
+
+  MI.eraseFromParent();
+  Unfolded->getParent()->dump();
+  return Unfolded;
+}
+
 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                     unsigned FrameReg, StackOffset &Offset,
                                     const AArch64InstrInfo *TII) {
   unsigned Opcode = MI.getOpcode();
   unsigned ImmIdx = FrameRegIdx + 1;
 
+  MachineInstr *NewMI = &MI;
+  if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
+    NewMI = unfoldAddXri(MI, FrameReg, TII);
+    Opcode = AArch64::ADDXri;
+  }
+
   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
-    Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
-    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
-                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
-                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
-    MI.eraseFromParent();
+    Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
+    emitFrameOffset(*NewMI->getParent(), *NewMI,
+                    NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(),
+                    FrameReg, Offset, TII, MachineInstr::NoFlags,
+                    (Opcode == AArch64::ADDSXri));
+    NewMI->eraseFromParent();
     Offset = StackOffset();
     return true;
   }
@@ -6588,16 +6617,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   int64_t NewOffset;
   unsigned UnscaledOp;
   bool UseUnscaledOp;
-  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
+  int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp,
                                          &UnscaledOp, &NewOffset);
   if (Status & AArch64FrameOffsetCanUpdate) {
     if (Status & AArch64FrameOffsetIsLegal)
       // Replace the FrameIndex with FrameReg.
-      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
     if (UseUnscaledOp)
-      MI.setDesc(TII->get(UnscaledOp));
+      NewMI->setDesc(TII->get(UnscaledOp));
 
-    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
+    NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset);
     return !Offset;
   }
 
diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll
deleted file mode 100644
index c3b296a795157..0000000000000
--- a/llvm/test/CodeGen/AArch64/pr157252.ll
+++ /dev/null
@@ -1,96 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
-
-define void @i(ptr %ad, ptr %0) #0 {
-; CHECK-LABEL: i:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str d11, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    add x29, sp, #16
-; CHECK-NEXT:    stp x28, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w28, -16
-; CHECK-NEXT:    .cfi_offset w30, -24
-; CHECK-NEXT:    .cfi_offset w29, -32
-; CHECK-NEXT:    .cfi_offset b11, -48
-; CHECK-NEXT:    //APP
-; CHECK-NEXT:    //NO_APP
-; CHECK-NEXT:  // %bb.1: // %asm.fallthrough
-; CHECK-NEXT:  .LBB0_2: // Inline asm indirect target
-; CHECK-NEXT:    // %ah.preheader.preheader
-; CHECK-NEXT:    // Label of block must be emitted
-; CHECK-NEXT:    mov x8, #-35417 // =0xffffffffffff75a7
-; CHECK-NEXT:    mov x9, #35417 // =0x8a59
-; CHECK-NEXT:    mov w19, #1 // =0x1
-; CHECK-NEXT:    movk x8, #29436, lsl #16
-; CHECK-NEXT:    movk x9, #36099, lsl #16
-; CHECK-NEXT:    stp x1, x0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    movk x8, #64591, lsl #32
-; CHECK-NEXT:    movk x9, #944, lsl #32
-; CHECK-NEXT:    index z0.d, x9, x8
-; CHECK-NEXT:    sub x8, x29, #16
-; CHECK-NEXT:    str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT:  .LBB0_3: // Inline asm indirect target
-; CHECK-NEXT:    // %ah
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    // Label of block must be emitted
-; CHECK-NEXT:    sub x9, x29, #16
-; CHECK-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT:    str d0, [x8]
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    sub x8, x29, #16
-; CHECK-NEXT:    str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT:    //APP
-; CHECK-NEXT:    //NO_APP
-; CHECK-NEXT:    //APP
-; CHECK-NEXT:    //NO_APP
-; CHECK-NEXT:  // %bb.4: // %asm.fallthrough2
-; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    bl g
-; CHECK-NEXT:    add x8, sp, #28
-; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    ldp x10, x8, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    strb w19, [x10]
-; CHECK-NEXT:    str w9, [x8]
-; CHECK-NEXT:    //APP
-; CHECK-NEXT:    //NO_APP
-; CHECK-NEXT:    b .LBB0_3
-entry:
-  %aj = alloca i32, align 4
-  callbr void asm sideeffect "", "!i,!i"()
-          to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
-
-ah.preheader.preheader:                           ; preds = %entry, %entry
-  %conv = xor i8 0, 1
-  br label %ah
-
-asm.fallthrough:                                  ; preds = %entry
-  unreachable
-
-ah:                                               ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
-  %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
-  %vecext = extractelement <8 x i64> %af.2, i64 0
-  store i64 %vecext, ptr %ad, align 8
-  call void asm sideeffect "", "~{v11}"()
-  callbr void asm sideeffect "", "!i"()
-          to label %asm.fallthrough2 [label %ah]
-
-asm.fallthrough2:                                 ; preds = %ah
-  %call = call i32 @g()
-  store i8 %conv, ptr %0, align 1
-  %cmp = icmp ne ptr %aj, null
-  %conv3 = zext i1 %cmp to i32
-  store i32 %conv3, ptr %ad, align 4
-  callbr void asm sideeffect "", "!i"()
-          to label %ah [label %ah]
-}
-
-declare i32 @g(...)
-
-attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir
new file mode 100644
index 0000000000000..6e7938709c3e1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.mir
@@ -0,0 +1,154 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s
+--- |
+  define void @i(ptr %ad, ptr %0) #0 {
+  entry:
+    ret void
+  }
+  declare i32 @g(...)
+  attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
+...
+---
+name:            i
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          true
+isSSA:           false
+noVRegs:         true
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: true
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    16
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -4, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+      stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: i
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000), %bb.1(0x00000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1, $d11, $lr, $x19, $x28
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8)
+  ; CHECK-NEXT:   frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
+  ; CHECK-NEXT:   frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
+  ; CHECK-NEXT:   $fp = frame-setup ADDXri $sp, 16, 0
+  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 16, 0
+  ; CHECK-NEXT:   $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w19, -8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w28, -16
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w30, -24
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w29, -32
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $b11, -48
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target):
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 1
+  ; CHECK-NEXT:   B %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target):
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000), %bb.3(0x00000000)
+  ; CHECK-NEXT:   liveins: $w19
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+  ; CHECK-NEXT:   B %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $w19
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+  ; CHECK-NEXT:   $x8 = ADDXri $sp, 28, 0
+  ; CHECK-NEXT:   $x8 = ADDVL_XXI $x8, 1, implicit $vg
+  ; CHECK-NEXT:   $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   B %bb.3
+  bb.0:
+    successors: %bb.2(0x80000000), %bb.1(0x00000000)
+    liveins: $x0, $x1
+
+    B %bb.2
+
+  bb.1 (inlineasm-br-indirect-target):
+    successors: %bb.3(0x80000000)
+    liveins: $x0, $x1
+
+    renamable $w19 = MOVi32imm 1
+    B %bb.3
+
+  bb.2:
+    successors:
+
+  bb.3 (inlineasm-br-indirect-target):
+    successors: %bb.4(0x80000000), %bb.3(0x00000000)
+    liveins: $w19
+
+    INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+    B %bb.4
+
+  bb.4:
+    successors: %bb.3(0x80000000)
+    liveins: $w19
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
+    B %bb.3
+...

>From 18c3c7b7e0190773007efc9b9a9482df1df62cb6 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:53:03 +0800
Subject: [PATCH 3/7] format

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 17a9999495602..26773fbb48e41 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6605,10 +6605,9 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
 
   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
     Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
-    emitFrameOffset(*NewMI->getParent(), *NewMI,
-                    NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(),
-                    FrameReg, Offset, TII, MachineInstr::NoFlags,
-                    (Opcode == AArch64::ADDSXri));
+    emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(),
+                    NewMI->getOperand(0).getReg(), FrameReg, Offset, TII,
+                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
     NewMI->eraseFromParent();
     Offset = StackOffset();
     return true;

>From a4b824a101d310b36cc62992ee802514623df1a6 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:53:28 +0800
Subject: [PATCH 4/7] Remove debugging

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 26773fbb48e41..3f468734a9c2f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6587,7 +6587,6 @@ static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
       .addImm(MI.getOperand(3).getImm());
 
   MI.eraseFromParent();
-  Unfolded->getParent()->dump();
   return Unfolded;
 }
 

>From f2fa5b4de87e1c43fd172a42ca7af1c9706c73ba Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 13:16:09 +0800
Subject: [PATCH 5/7] rename

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3f468734a9c2f..0ee6dd665df57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6569,7 +6569,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
 //  -->
 //    add %dest, %stack, 0
 //    adds %dest, %dest, c
-static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
+static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
                                   const AArch64InstrInfo *TII) {
   auto *MBB = MI.getParent();
   Register DestReg = MI.getOperand(0).getReg();
@@ -6598,7 +6598,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
 
   MachineInstr *NewMI = &MI;
   if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
-    NewMI = unfoldAddXri(MI, FrameReg, TII);
+    NewMI = unfoldAddSXri(MI, FrameReg, TII);
     Opcode = AArch64::ADDXri;
   }
 

>From 66b32561e8bba3408fd7eb9a73a3bd68c672edd5 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 15:52:49 +0800
Subject: [PATCH 6/7] format

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0ee6dd665df57..3305f187976e9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6570,7 +6570,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
 //    add %dest, %stack, 0
 //    adds %dest, %dest, c
 static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
-                                  const AArch64InstrInfo *TII) {
+                                   const AArch64InstrInfo *TII) {
   auto *MBB = MI.getParent();
   Register DestReg = MI.getOperand(0).getReg();
 

>From ebbd0ce7a1a804e4e4a23f59bd96fef6236e36e3 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 21:06:12 +0800
Subject: [PATCH 7/7] resolve comments

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp |  61 ++++------
 llvm/test/CodeGen/AArch64/pr157252.mir       | 111 +++++--------------
 2 files changed, 49 insertions(+), 123 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3305f187976e9..f06c2030f20b7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6263,6 +6263,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
       Offset, Bytes, NumPredicateVectors, NumDataVectors);
 
+  // Insert ADDSXri for scalable offset at the end.
+  bool NeedInsertADDS = SetNZCV && (NumPredicateVectors || NumDataVectors);
+  if (NeedInsertADDS)
+    SetNZCV = false;
+
   // First emit non-scalable frame offsets, or a simple 'mov'.
   if (Bytes || (!Offset && SrcReg != DestReg)) {
     assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
@@ -6282,8 +6287,6 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
     FrameReg = DestReg;
   }
 
-  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
-         "SetNZCV not supported with SVE vectors");
   assert(!(NeedsWinCFI && NumPredicateVectors) &&
          "WinCFI can't allocate fractions of an SVE data vector");
 
@@ -6303,6 +6306,12 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                        Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
                        FrameReg);
   }
+
+  if (NeedInsertADDS)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
+        .addReg(DestReg)
+        .addImm(0)
+        .addImm(0);
 }
 
 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
@@ -6564,50 +6573,18 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
 }
 
-// Unfold ADDSXri:
-//    adds %dest, %stack, c
-//  -->
-//    add %dest, %stack, 0
-//    adds %dest, %dest, c
-static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
-                                   const AArch64InstrInfo *TII) {
-  auto *MBB = MI.getParent();
-  Register DestReg = MI.getOperand(0).getReg();
-
-  auto *Unfolded =
-      BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg)
-          .addReg(FrameReg)
-          .addImm(0)
-          .addImm(0)
-          .getInstr();
-
-  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg)
-      .addReg(DestReg)
-      .addImm(MI.getOperand(2).getImm())
-      .addImm(MI.getOperand(3).getImm());
-
-  MI.eraseFromParent();
-  return Unfolded;
-}
-
 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                     unsigned FrameReg, StackOffset &Offset,
                                     const AArch64InstrInfo *TII) {
   unsigned Opcode = MI.getOpcode();
   unsigned ImmIdx = FrameRegIdx + 1;
 
-  MachineInstr *NewMI = &MI;
-  if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
-    NewMI = unfoldAddSXri(MI, FrameReg, TII);
-    Opcode = AArch64::ADDXri;
-  }
-
   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
-    Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
-    emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(),
-                    NewMI->getOperand(0).getReg(), FrameReg, Offset, TII,
+    Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
+    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
+                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
-    NewMI->eraseFromParent();
+    MI.eraseFromParent();
     Offset = StackOffset();
     return true;
   }
@@ -6615,16 +6592,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   int64_t NewOffset;
   unsigned UnscaledOp;
   bool UseUnscaledOp;
-  int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp,
+  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
                                          &UnscaledOp, &NewOffset);
   if (Status & AArch64FrameOffsetCanUpdate) {
     if (Status & AArch64FrameOffsetIsLegal)
       // Replace the FrameIndex with FrameReg.
-      NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
     if (UseUnscaledOp)
-      NewMI->setDesc(TII->get(UnscaledOp));
+      MI.setDesc(TII->get(UnscaledOp));
 
-    NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset);
+    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
     return !Offset;
   }
 
diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir
index 6e7938709c3e1..b1956cc83a434 100644
--- a/llvm/test/CodeGen/AArch64/pr157252.mir
+++ b/llvm/test/CodeGen/AArch64/pr157252.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -o - %s | FileCheck %s
 --- |
   define void @i(ptr %ad, ptr %0) #0 {
   entry:
@@ -59,12 +59,6 @@ stack:
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       local-offset: -4, debug-info-variable: '', debug-info-expression: '',
       debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
   - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
       stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
@@ -74,81 +68,36 @@ debugValueSubstitutions: []
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: i
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000), %bb.1(0x00000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1, $d11, $lr, $x19, $x28
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8)
-  ; CHECK-NEXT:   frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
-  ; CHECK-NEXT:   frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
-  ; CHECK-NEXT:   $fp = frame-setup ADDXri $sp, 16, 0
-  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 16, 0
-  ; CHECK-NEXT:   $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $w29, 32
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w19, -8
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w28, -16
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w30, -24
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w29, -32
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $b11, -48
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target):
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 1
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target):
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000), %bb.3(0x00000000)
-  ; CHECK-NEXT:   liveins: $w19
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $w19
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   $x8 = ADDXri $sp, 28, 0
-  ; CHECK-NEXT:   $x8 = ADDVL_XXI $x8, 1, implicit $vg
-  ; CHECK-NEXT:   $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   B %bb.3
   bb.0:
-    successors: %bb.2(0x80000000), %bb.1(0x00000000)
-    liveins: $x0, $x1
-
-    B %bb.2
-
-  bb.1 (inlineasm-br-indirect-target):
-    successors: %bb.3(0x80000000)
-    liveins: $x0, $x1
-
-    renamable $w19 = MOVi32imm 1
-    B %bb.3
-
-  bb.2:
-    successors:
-
-  bb.3 (inlineasm-br-indirect-target):
-    successors: %bb.4(0x80000000), %bb.3(0x00000000)
-    liveins: $w19
-
+    ; CHECK-LABEL: name: i
+    ; CHECK: liveins: $d11, $lr, $x20, $x28
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.6)
+    ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
+    ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x20, $sp, 4 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
+    ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0
+    ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
+    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48
+    ; CHECK-NEXT: $w20 = MOVi32imm 1
+    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+    ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    ; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0
+    ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
+    ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
+    ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+    ; CHECK-NEXT: $x28, $x20 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
+    ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
+    ; CHECK-NEXT: early-clobber $sp, $d11 = frame-destroy LDRDpost $sp, 48 :: (load (s64) from %stack.6)
+    ; CHECK-NEXT: RET_ReallyLR
+    $w20 = MOVi32imm 1
     INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
-    B %bb.4
-
-  bb.4:
-    successors: %bb.3(0x80000000)
-    liveins: $w19
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
     BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
-    B %bb.3
+    $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
+    RET_ReallyLR
 ...
+



More information about the llvm-commits mailing list