[llvm] [AArch64] Unfold adds when eliminating frame index with scalable offset (PR #158597)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 06:06:37 PDT 2025
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/158597
>From 27c6b10a95f3056e24ae5381f21233447c1816f3 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 15 Sep 2025 18:16:50 +0800
Subject: [PATCH 1/7] [AArch64] Avoid apply S-form on frame index in peephole
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 5 +
llvm/test/CodeGen/AArch64/pr157252.ll | 96 ++++++++++++++++++++
2 files changed, 101 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e56fe90259d5c..2c09710831808 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1920,6 +1920,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
CmpInstr.getOperand(2).getImm() == 0) &&
"Caller guarantees that CmpInstr compares with constant 0");
+ // NZCV is not supported if the stack offset is scalable.
+ auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
+ if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
+ return false;
+
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
if (!NZVCUsed || NZVCUsed->C)
return false;
diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll
new file mode 100644
index 0000000000000..c3b296a795157
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @i(ptr %ad, ptr %0) #0 {
+; CHECK-LABEL: i:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_def_cfa w29, 32
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w28, -16
+; CHECK-NEXT: .cfi_offset w30, -24
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: // %bb.1: // %asm.fallthrough
+; CHECK-NEXT: .LBB0_2: // Inline asm indirect target
+; CHECK-NEXT: // %ah.preheader.preheader
+; CHECK-NEXT: // Label of block must be emitted
+; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7
+; CHECK-NEXT: mov x9, #35417 // =0x8a59
+; CHECK-NEXT: mov w19, #1 // =0x1
+; CHECK-NEXT: movk x8, #29436, lsl #16
+; CHECK-NEXT: movk x9, #36099, lsl #16
+; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: movk x8, #64591, lsl #32
+; CHECK-NEXT: movk x9, #944, lsl #32
+; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .LBB0_3: // Inline asm indirect target
+; CHECK-NEXT: // %ah
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: // Label of block must be emitted
+; CHECK-NEXT: sub x9, x29, #16
+; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: // %bb.4: // %asm.fallthrough2
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: add x8, sp, #28
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: strb w19, [x10]
+; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: b .LBB0_3
+entry:
+ %aj = alloca i32, align 4
+ callbr void asm sideeffect "", "!i,!i"()
+ to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
+
+ah.preheader.preheader: ; preds = %entry, %entry
+ %conv = xor i8 0, 1
+ br label %ah
+
+asm.fallthrough: ; preds = %entry
+ unreachable
+
+ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
+ %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
+ %vecext = extractelement <8 x i64> %af.2, i64 0
+ store i64 %vecext, ptr %ad, align 8
+ call void asm sideeffect "", "~{v11}"()
+ callbr void asm sideeffect "", "!i"()
+ to label %asm.fallthrough2 [label %ah]
+
+asm.fallthrough2: ; preds = %ah
+ %call = call i32 @g()
+ store i8 %conv, ptr %0, align 1
+ %cmp = icmp ne ptr %aj, null
+ %conv3 = zext i1 %cmp to i32
+ store i32 %conv3, ptr %ad, align 4
+ callbr void asm sideeffect "", "!i"()
+ to label %ah [label %ah]
+}
+
+declare i32 @g(...)
+
+attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
>From a30b328fbbe1c055c97bfcb40aed7d1fb88417ba Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:48:50 +0800
Subject: [PATCH 2/7] Unfold adds
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 57 +++++--
llvm/test/CodeGen/AArch64/pr157252.ll | 96 ------------
llvm/test/CodeGen/AArch64/pr157252.mir | 154 +++++++++++++++++++
3 files changed, 197 insertions(+), 110 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll
create mode 100644 llvm/test/CodeGen/AArch64/pr157252.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 2c09710831808..17a9999495602 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1920,11 +1920,6 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
CmpInstr.getOperand(2).getImm() == 0) &&
"Caller guarantees that CmpInstr compares with constant 0");
- // NZCV is not supported if the stack offset is scalable.
- auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
- if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
- return false;
-
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
if (!NZVCUsed || NZVCUsed->C)
return false;
@@ -6569,18 +6564,52 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
+// Unfold ADDSXri:
+// adds %dest, %stack, c
+// -->
+// add %dest, %stack, 0
+// adds %dest, %dest, c
+static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
+ const AArch64InstrInfo *TII) {
+ auto *MBB = MI.getParent();
+ Register DestReg = MI.getOperand(0).getReg();
+
+ auto *Unfolded =
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg)
+ .addReg(FrameReg)
+ .addImm(0)
+ .addImm(0)
+ .getInstr();
+
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg)
+ .addReg(DestReg)
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm());
+
+ MI.eraseFromParent();
+ Unfolded->getParent()->dump();
+ return Unfolded;
+}
+
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, StackOffset &Offset,
const AArch64InstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
unsigned ImmIdx = FrameRegIdx + 1;
+ MachineInstr *NewMI = &MI;
+ if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
+ NewMI = unfoldAddXri(MI, FrameReg, TII);
+ Opcode = AArch64::ADDXri;
+ }
+
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
- emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
- MI.getOperand(0).getReg(), FrameReg, Offset, TII,
- MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
- MI.eraseFromParent();
+ Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
+ emitFrameOffset(*NewMI->getParent(), *NewMI,
+ NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(),
+ FrameReg, Offset, TII, MachineInstr::NoFlags,
+ (Opcode == AArch64::ADDSXri));
+ NewMI->eraseFromParent();
Offset = StackOffset();
return true;
}
@@ -6588,16 +6617,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
int64_t NewOffset;
unsigned UnscaledOp;
bool UseUnscaledOp;
- int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
+ int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp,
&UnscaledOp, &NewOffset);
if (Status & AArch64FrameOffsetCanUpdate) {
if (Status & AArch64FrameOffsetIsLegal)
// Replace the FrameIndex with FrameReg.
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
if (UseUnscaledOp)
- MI.setDesc(TII->get(UnscaledOp));
+ NewMI->setDesc(TII->get(UnscaledOp));
- MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
+ NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset);
return !Offset;
}
diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll
deleted file mode 100644
index c3b296a795157..0000000000000
--- a/llvm/test/CodeGen/AArch64/pr157252.ll
+++ /dev/null
@@ -1,96 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
-
-define void @i(ptr %ad, ptr %0) #0 {
-; CHECK-LABEL: i:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: add x29, sp, #16
-; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: .cfi_def_cfa w29, 32
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w28, -16
-; CHECK-NEXT: .cfi_offset w30, -24
-; CHECK-NEXT: .cfi_offset w29, -32
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: //APP
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: // %bb.1: // %asm.fallthrough
-; CHECK-NEXT: .LBB0_2: // Inline asm indirect target
-; CHECK-NEXT: // %ah.preheader.preheader
-; CHECK-NEXT: // Label of block must be emitted
-; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7
-; CHECK-NEXT: mov x9, #35417 // =0x8a59
-; CHECK-NEXT: mov w19, #1 // =0x1
-; CHECK-NEXT: movk x8, #29436, lsl #16
-; CHECK-NEXT: movk x9, #36099, lsl #16
-; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: movk x8, #64591, lsl #32
-; CHECK-NEXT: movk x9, #944, lsl #32
-; CHECK-NEXT: index z0.d, x9, x8
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .LBB0_3: // Inline asm indirect target
-; CHECK-NEXT: // %ah
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: // Label of block must be emitted
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT: str d0, [x8]
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: //APP
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: //APP
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: // %bb.4: // %asm.fallthrough2
-; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: bl g
-; CHECK-NEXT: add x8, sp, #28
-; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: cset w9, ne
-; CHECK-NEXT: strb w19, [x10]
-; CHECK-NEXT: str w9, [x8]
-; CHECK-NEXT: //APP
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: b .LBB0_3
-entry:
- %aj = alloca i32, align 4
- callbr void asm sideeffect "", "!i,!i"()
- to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
-
-ah.preheader.preheader: ; preds = %entry, %entry
- %conv = xor i8 0, 1
- br label %ah
-
-asm.fallthrough: ; preds = %entry
- unreachable
-
-ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
- %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
- %vecext = extractelement <8 x i64> %af.2, i64 0
- store i64 %vecext, ptr %ad, align 8
- call void asm sideeffect "", "~{v11}"()
- callbr void asm sideeffect "", "!i"()
- to label %asm.fallthrough2 [label %ah]
-
-asm.fallthrough2: ; preds = %ah
- %call = call i32 @g()
- store i8 %conv, ptr %0, align 1
- %cmp = icmp ne ptr %aj, null
- %conv3 = zext i1 %cmp to i32
- store i32 %conv3, ptr %ad, align 4
- callbr void asm sideeffect "", "!i"()
- to label %ah [label %ah]
-}
-
-declare i32 @g(...)
-
-attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir
new file mode 100644
index 0000000000000..6e7938709c3e1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.mir
@@ -0,0 +1,154 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s
+--- |
+ define void @i(ptr %ad, ptr %0) #0 {
+ entry:
+ ret void
+ }
+ declare i32 @g(...)
+ attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
+...
+---
+name: i
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: false
+noVRegs: true
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: true
+registers: []
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 16
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: i
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
+ ; CHECK-NEXT: liveins: $x0, $x1, $d11, $lr, $x19, $x28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8)
+ ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
+ ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+ ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target):
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $w19 = MOVi32imm 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target):
+ ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000)
+ ; CHECK-NEXT: liveins: $w19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $w19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: $x8 = ADDXri $sp, 28, 0
+ ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
+ ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: B %bb.3
+ bb.0:
+ successors: %bb.2(0x80000000), %bb.1(0x00000000)
+ liveins: $x0, $x1
+
+ B %bb.2
+
+ bb.1 (inlineasm-br-indirect-target):
+ successors: %bb.3(0x80000000)
+ liveins: $x0, $x1
+
+ renamable $w19 = MOVi32imm 1
+ B %bb.3
+
+ bb.2:
+ successors:
+
+ bb.3 (inlineasm-br-indirect-target):
+ successors: %bb.4(0x80000000), %bb.3(0x00000000)
+ liveins: $w19
+
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+ B %bb.4
+
+ bb.4:
+ successors: %bb.3(0x80000000)
+ liveins: $w19
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
+ B %bb.3
+...
>From 18c3c7b7e0190773007efc9b9a9482df1df62cb6 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:53:03 +0800
Subject: [PATCH 3/7] format
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 17a9999495602..26773fbb48e41 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6605,10 +6605,9 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
- emitFrameOffset(*NewMI->getParent(), *NewMI,
- NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(),
- FrameReg, Offset, TII, MachineInstr::NoFlags,
- (Opcode == AArch64::ADDSXri));
+ emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(),
+ NewMI->getOperand(0).getReg(), FrameReg, Offset, TII,
+ MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
NewMI->eraseFromParent();
Offset = StackOffset();
return true;
>From a4b824a101d310b36cc62992ee802514623df1a6 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 01:53:28 +0800
Subject: [PATCH 4/7] Remove debugging
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 26773fbb48e41..3f468734a9c2f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6587,7 +6587,6 @@ static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
.addImm(MI.getOperand(3).getImm());
MI.eraseFromParent();
- Unfolded->getParent()->dump();
return Unfolded;
}
>From f2fa5b4de87e1c43fd172a42ca7af1c9706c73ba Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 13:16:09 +0800
Subject: [PATCH 5/7] rename
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3f468734a9c2f..0ee6dd665df57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6569,7 +6569,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// -->
// add %dest, %stack, 0
// adds %dest, %dest, c
-static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
+static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
const AArch64InstrInfo *TII) {
auto *MBB = MI.getParent();
Register DestReg = MI.getOperand(0).getReg();
@@ -6598,7 +6598,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MachineInstr *NewMI = &MI;
if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
- NewMI = unfoldAddXri(MI, FrameReg, TII);
+ NewMI = unfoldAddSXri(MI, FrameReg, TII);
Opcode = AArch64::ADDXri;
}
>From 66b32561e8bba3408fd7eb9a73a3bd68c672edd5 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 15:52:49 +0800
Subject: [PATCH 6/7] format
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0ee6dd665df57..3305f187976e9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6570,7 +6570,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// add %dest, %stack, 0
// adds %dest, %dest, c
static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
- const AArch64InstrInfo *TII) {
+ const AArch64InstrInfo *TII) {
auto *MBB = MI.getParent();
Register DestReg = MI.getOperand(0).getReg();
>From ebbd0ce7a1a804e4e4a23f59bd96fef6236e36e3 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 21:06:12 +0800
Subject: [PATCH 7/7] resolve comments
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 61 ++++------
llvm/test/CodeGen/AArch64/pr157252.mir | 111 +++++--------------
2 files changed, 49 insertions(+), 123 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3305f187976e9..f06c2030f20b7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6263,6 +6263,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
Offset, Bytes, NumPredicateVectors, NumDataVectors);
+ // Insert ADDSXri for scalable offset at the end.
+ bool NeedInsertADDS = SetNZCV && (NumPredicateVectors || NumDataVectors);
+ if (NeedInsertADDS)
+ SetNZCV = false;
+
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
@@ -6282,8 +6287,6 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
FrameReg = DestReg;
}
- assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
- "SetNZCV not supported with SVE vectors");
assert(!(NeedsWinCFI && NumPredicateVectors) &&
"WinCFI can't allocate fractions of an SVE data vector");
@@ -6303,6 +6306,12 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
FrameReg);
}
+
+ if (NeedInsertADDS)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
+ .addReg(DestReg)
+ .addImm(0)
+ .addImm(0);
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
@@ -6564,50 +6573,18 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
-// Unfold ADDSXri:
-// adds %dest, %stack, c
-// -->
-// add %dest, %stack, 0
-// adds %dest, %dest, c
-static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg,
- const AArch64InstrInfo *TII) {
- auto *MBB = MI.getParent();
- Register DestReg = MI.getOperand(0).getReg();
-
- auto *Unfolded =
- BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg)
- .addReg(FrameReg)
- .addImm(0)
- .addImm(0)
- .getInstr();
-
- BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg)
- .addReg(DestReg)
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm());
-
- MI.eraseFromParent();
- return Unfolded;
-}
-
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, StackOffset &Offset,
const AArch64InstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
unsigned ImmIdx = FrameRegIdx + 1;
- MachineInstr *NewMI = &MI;
- if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
- NewMI = unfoldAddSXri(MI, FrameReg, TII);
- Opcode = AArch64::ADDXri;
- }
-
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
- emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(),
- NewMI->getOperand(0).getReg(), FrameReg, Offset, TII,
+ Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
+ emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
- NewMI->eraseFromParent();
+ MI.eraseFromParent();
Offset = StackOffset();
return true;
}
@@ -6615,16 +6592,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
int64_t NewOffset;
unsigned UnscaledOp;
bool UseUnscaledOp;
- int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp,
+ int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
&UnscaledOp, &NewOffset);
if (Status & AArch64FrameOffsetCanUpdate) {
if (Status & AArch64FrameOffsetIsLegal)
// Replace the FrameIndex with FrameReg.
- NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
if (UseUnscaledOp)
- NewMI->setDesc(TII->get(UnscaledOp));
+ MI.setDesc(TII->get(UnscaledOp));
- NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset);
+ MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
return !Offset;
}
diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir
index 6e7938709c3e1..b1956cc83a434 100644
--- a/llvm/test/CodeGen/AArch64/pr157252.mir
+++ b/llvm/test/CodeGen/AArch64/pr157252.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -o - %s | FileCheck %s
--- |
define void @i(ptr %ad, ptr %0) #0 {
entry:
@@ -59,12 +59,6 @@ stack:
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
@@ -74,81 +68,36 @@ debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
- ; CHECK-LABEL: name: i
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
- ; CHECK-NEXT: liveins: $x0, $x1, $d11, $lr, $x19, $x28
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8)
- ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
- ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
- ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0
- ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
- ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
- ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48
- ; CHECK-NEXT: B %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target):
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $x0, $x1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w19 = MOVi32imm 1
- ; CHECK-NEXT: B %bb.3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors:
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target):
- ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000)
- ; CHECK-NEXT: liveins: $w19
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
- ; CHECK-NEXT: B %bb.4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $w19
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
- ; CHECK-NEXT: $x8 = ADDXri $sp, 28, 0
- ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
- ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
- ; CHECK-NEXT: B %bb.3
bb.0:
- successors: %bb.2(0x80000000), %bb.1(0x00000000)
- liveins: $x0, $x1
-
- B %bb.2
-
- bb.1 (inlineasm-br-indirect-target):
- successors: %bb.3(0x80000000)
- liveins: $x0, $x1
-
- renamable $w19 = MOVi32imm 1
- B %bb.3
-
- bb.2:
- successors:
-
- bb.3 (inlineasm-br-indirect-target):
- successors: %bb.4(0x80000000), %bb.3(0x00000000)
- liveins: $w19
-
+ ; CHECK-LABEL: name: i
+ ; CHECK: liveins: $d11, $lr, $x20, $x28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.6)
+ ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
+ ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x20, $sp, 4 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0
+ ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48
+ ; CHECK-NEXT: $w20 = MOVi32imm 1
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0
+ ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
+ ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+ ; CHECK-NEXT: $x28, $x20 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
+ ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
+ ; CHECK-NEXT: early-clobber $sp, $d11 = frame-destroy LDRDpost $sp, 48 :: (load (s64) from %stack.6)
+ ; CHECK-NEXT: RET_ReallyLR
+ $w20 = MOVi32imm 1
INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
- B %bb.4
-
- bb.4:
- successors: %bb.3(0x80000000)
- liveins: $w19
-
- ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
- ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
- dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
- B %bb.3
+ $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
+ RET_ReallyLR
...
+
More information about the llvm-commits
mailing list