[llvm] [AArch64] Avoid apply S-form on frame index in peephole (PR #158597)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 15 03:33:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Hongyu Chen (XChy)
<details>
<summary>Changes</summary>
Fixes https://github.com/llvm/llvm-project/issues/157252.
Peephole optimization tends to fold:
```
add %gpr1, %stack, 0
subs %gpr2, %gpr1, 0
```
to
```
adds %gpr2, %stack, 0
```
This is illegal if the stack offset is scalable. This patch disallows such optimization.
An alternative is to expand the S-form reversely in `emitFrameOffset`.
---
Full diff: https://github.com/llvm/llvm-project/pull/158597.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+5)
- (added) llvm/test/CodeGen/AArch64/pr157252.ll (+96)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e56fe90259d5c..2c09710831808 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1920,6 +1920,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
CmpInstr.getOperand(2).getImm() == 0) &&
"Caller guarantees that CmpInstr compares with constant 0");
+ // NZCV is not supported if the stack offset is scalable.
+ auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
+ if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
+ return false;
+
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
if (!NZVCUsed || NZVCUsed->C)
return false;
diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll
new file mode 100644
index 0000000000000..c3b296a795157
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr157252.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @i(ptr %ad, ptr %0) #0 {
+; CHECK-LABEL: i:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_def_cfa w29, 32
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w28, -16
+; CHECK-NEXT: .cfi_offset w30, -24
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: // %bb.1: // %asm.fallthrough
+; CHECK-NEXT: .LBB0_2: // Inline asm indirect target
+; CHECK-NEXT: // %ah.preheader.preheader
+; CHECK-NEXT: // Label of block must be emitted
+; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7
+; CHECK-NEXT: mov x9, #35417 // =0x8a59
+; CHECK-NEXT: mov w19, #1 // =0x1
+; CHECK-NEXT: movk x8, #29436, lsl #16
+; CHECK-NEXT: movk x9, #36099, lsl #16
+; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: movk x8, #64591, lsl #32
+; CHECK-NEXT: movk x9, #944, lsl #32
+; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .LBB0_3: // Inline asm indirect target
+; CHECK-NEXT: // %ah
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: // Label of block must be emitted
+; CHECK-NEXT: sub x9, x29, #16
+; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: // %bb.4: // %asm.fallthrough2
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: add x8, sp, #28
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: strb w19, [x10]
+; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: b .LBB0_3
+entry:
+ %aj = alloca i32, align 4
+ callbr void asm sideeffect "", "!i,!i"()
+ to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
+
+ah.preheader.preheader: ; preds = %entry, %entry
+ %conv = xor i8 0, 1
+ br label %ah
+
+asm.fallthrough: ; preds = %entry
+ unreachable
+
+ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
+ %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
+ %vecext = extractelement <8 x i64> %af.2, i64 0
+ store i64 %vecext, ptr %ad, align 8
+ call void asm sideeffect "", "~{v11}"()
+ callbr void asm sideeffect "", "!i"()
+ to label %asm.fallthrough2 [label %ah]
+
+asm.fallthrough2: ; preds = %ah
+ %call = call i32 @g()
+ store i8 %conv, ptr %0, align 1
+ %cmp = icmp ne ptr %aj, null
+ %conv3 = zext i1 %cmp to i32
+ store i32 %conv3, ptr %ad, align 4
+ callbr void asm sideeffect "", "!i"()
+ to label %ah [label %ah]
+}
+
+declare i32 @g(...)
+
+attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/158597
More information about the llvm-commits
mailing list