[llvm] [CodeGen] Avoid aligning alloca size. (PR #132064)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 10:21:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (jcogan-nv)
<details>
<summary>Changes</summary>
GlobalIsel and SelectionDAG will emit an ADD and AND to pad the size of a dynamic `alloca` to a multiple of the stack alignment. When the alignment for the `alloca` is stricter than the stack alignment, and the stack grows down, this is not necessary because we will already clamp the stack pointer to the `alloca`'s alignment after adding the size (for example, see `LegalizerHelper::getDynStackAllocTargetPtr`). This change avoids padding the size of `alloca` when it is not necessary.
---
Patch is 30.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132064.diff
22 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+31-13)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+27-16)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll (+1-5)
- (modified) llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll (+1-5)
- (modified) llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll (+4-10)
- (modified) llvm/test/CodeGen/AArch64/sve-alloca.ll (+1-3)
- (modified) llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll (+2-7)
- (modified) llvm/test/CodeGen/PowerPC/pr46759.ll (-3)
- (modified) llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll (+4-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll (+2-10)
- (modified) llvm/test/CodeGen/RISCV/stack-clash-prologue.ll (+1-5)
- (modified) llvm/test/CodeGen/SPARC/alloca-align.ll (+2-12)
- (modified) llvm/test/CodeGen/SPARC/stack-align.ll (+2-2)
- (modified) llvm/test/CodeGen/SystemZ/alloca-03.ll (+1-1)
- (modified) llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll (+4-5)
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_align.ll (+6-8)
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_call.ll (-8)
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll (-8)
- (modified) llvm/test/CodeGen/X86/clobber_base_ptr.ll (-4)
- (modified) llvm/test/CodeGen/X86/pr50782.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll (+7-11)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b85239ebf08cb..d55096af8836c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U,
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
- auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
- auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
- MachineInstr::NoUWrap);
- auto AlignCst =
- MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
- auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
-
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ Align StackAlign = TFI->getStackAlign();
Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
- if (Alignment <= StackAlign)
+
+ // If the stack alignment is stricter than the alloca's alignment, ignore the
+ // alloca's alignment. We will align the size of the alloca to the stack
+ // alignment, which will guarantee that the alloca's alignment is satisfied.
+ bool IsUnderAligned = Alignment <= StackAlign;
+ if (IsUnderAligned)
Alignment = Align(1);
- MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+
+ // If the stack grows up, adding the alloca's size to SP without padding may
+ // leave SP not aligned (to the stack alignment) after the alloca because we
+ // align SP (to the stack align or alloca align) *before* adding the alloca
+ // size. On the other hand, if the stack grows down, we will align SP *after*
+ // decrementing it, so there is no need to pad the size.
+ if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+ IsUnderAligned) {
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ auto SAMinusOne =
+ MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
+ auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+ MachineInstr::NoUWrap);
+ auto AlignCst =
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
+ auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
+
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+ } else {
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment);
+ }
MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
assert(MF->getFrameInfo().hasVarSizedObjects());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 14bb1d943d2d6..fe1a8bab30e54 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
}
- // Handle alignment. If the requested alignment is less than or equal to
- // the stack alignment, ignore it. If the size is greater than or equal to
- // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
- if (*Alignment <= StackAlign)
+ // Handle alignment. If the requested alignment is less than or equal to the
+ // stack alignment, ignore it since we will align the size. If the size is
+ // greater than or equal to the stack alignment, we note this in the
+ // DYNAMIC_STACKALLOC node.
+ const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
+ Align StackAlign = TFI->getStackAlign();
+ bool IsUnderAligned = *Alignment <= StackAlign;
+ if (IsUnderAligned)
Alignment = std::nullopt;
- const uint64_t StackAlignMask = StackAlign.value() - 1U;
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(StackAlignMask, dl, IntPtr),
- SDNodeFlags::NoUnsignedWrap);
-
- // Mask out the low bits for alignment purposes.
- AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
- DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+ // If the stack grows up, adding the alloca's size to SP without padding may
+ // leave SP not aligned (to the stack alignment) after the alloca because we
+ // align SP (to the stack align or alloca align) *before* adding the alloca
+ // size. On the other hand, if the stack grows down, we will align SP *after*
+ // decrementing it, so there is no need to align the size.
+ if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+ IsUnderAligned) {
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlignMask, dl, IntPtr),
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+ }
SDValue Ops[] = {
getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
index 88eaa1382d1d6..0f74b51262339 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
@@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) {
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
- ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32
+ ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32
; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0)
; CHECK: RET_ReallyLR implicit $x0
%addr = alloca i8, i32 %numelts, align 32
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index f49bb910b5bd1..85c6b2a2fb854 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,11 +160,7 @@ define void @quux() #1 {
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, #16
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: addvl x9, x8, #2
-; CHECK-NEXT: mov w0, w9
-; CHECK-NEXT: // implicit-def: $x9
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: and x9, x9, #0x7f0
+; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index e7687f0d3994b..b32eb210ce0e7 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: sub x9, sp, #32
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-DAG: str xzr, [sp]
-; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT: str xzr, [sp]
; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x9
+; CHECK-DAG: sub x8, x8, x0
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-DAG: ldr xzr, [sp]
-; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT: ldr xzr, [sp]
; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x9
+; CHECK-DAG: sub x8, x8, x0
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 2520095cce62e..3d3c5da483015 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,10 +54,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: add x9, x9, #15
-; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK-NEXT: cnth x9, all, mul #4
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0
; CHECK-NEXT: mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 44281bcc3647d..8895a9a920569 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX32-NEXT: slwi 3, 3, 2
; AIX32-NEXT: lwz 4, 0(1)
; AIX32-NEXT: li 5, -64
-; AIX32-NEXT: addi 3, 3, 15
-; AIX32-NEXT: mr 31, 1
-; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27
; AIX32-NEXT: neg 3, 3
+; AIX32-NEXT: mr 31, 1
; AIX32-NEXT: and 5, 3, 5
; AIX32-NEXT: stwux 4, 1, 5
; AIX32-NEXT: addi 3, 1, 64
@@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX64-NEXT: rldic 3, 3, 2, 30
; AIX64-NEXT: ld 4, 0(1)
; AIX64-NEXT: li 5, -64
-; AIX64-NEXT: addi 3, 3, 15
-; AIX64-NEXT: mr 31, 1
-; AIX64-NEXT: rldicl 3, 3, 60, 4
-; AIX64-NEXT: rldicl 3, 3, 4, 29
; AIX64-NEXT: neg 3, 3
+; AIX64-NEXT: mr 31, 1
; AIX64-NEXT: and 5, 3, 5
; AIX64-NEXT: stdux 4, 1, 5
; AIX64-NEXT: addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index d1129b1825aee..8653ca997c4ed 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 {
; CHECK-LE-NEXT: li r4, -2048
; CHECK-LE-NEXT: li r6, -4096
; CHECK-LE-NEXT: mr r31, r1
-; CHECK-LE-NEXT: addi r3, r3, 15
-; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
-; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index b887bbb8c99f8..15d0cd5e9baee 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r31, -8
; CHECK-LE-NEXT: .cfi_offset r30, -16
-; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: lis r5, 1
; CHECK-LE-NEXT: mr r31, r1
-; CHECK-LE-NEXT: li r6, 1
; CHECK-LE-NEXT: sldi r4, r4, 2
-; CHECK-LE-NEXT: addi r3, r3, 15
+; CHECK-LE-NEXT: li r6, 1
+; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: ori r5, r5, 0
-; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
; CHECK-LE-NEXT: add r5, r31, r5
-; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: stwx r6, r5, r4
+; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: li r4, -32768
; CHECK-LE-NEXT: li r6, -4096
-; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
; CHECK-LE-NEXT: mr r5, r4
@@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r31, -8
; CHECK-BE-NEXT: .cfi_offset r30, -16
-; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: lis r5, 1
-; CHECK-BE-NEXT: addi r3, r3, 15
; CHECK-BE-NEXT: mr r31, r1
; CHECK-BE-NEXT: ori r5, r5, 0
-; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
; CHECK-BE-NEXT: add r5, r31, r5
; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: li r6, 1
-; CHECK-BE-NEXT: rldicl r3, r3, 4, 31
+; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: stwx r6, r5, r4
; CHECK-BE-NEXT: neg r7, r3
; CHECK-BE-NEXT: li r4, -32768
@@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-32-NEXT: lis r4, 1
; CHECK-32-NEXT: mr r31, r1
; CHECK-32-NEXT: ori r4, r4, 0
-; CHECK-32-NEXT: addi r3, r3, 15
; CHECK-32-NEXT: add r4, r31, r4
; CHECK-32-NEXT: li r5, 1
; CHECK-32-NEXT: slwi r6, r6, 2
-; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
; CHECK-32-NEXT: neg r7, r3
; CHECK-32-NEXT: stwx r5, r4, r6
; CHECK-32-NEXT: li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..07daca9c7851e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV64I-NEXT: .cfi_def_cfa s0, 0
; RV64I-NEXT: andi sp, sp, -64
; RV64I-NEXT: mv s1, sp
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -64
; RV64I-NEXT: lui a2, 1
@@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV32I-NEXT: .cfi_def_cfa s0, 0
; RV32I-NEXT: andi sp, sp, -64
; RV32I-NEXT: mv s1, sp
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -64
; RV32I-NEXT: lui a1, 1
@@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV64I-NEXT: srli a2, sp, 13
; RV64I-NEXT: slli sp, a2, 13
; RV64I-NEXT: mv s1, sp
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: lui a2, 1048574
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
+; RV64I-NEXT: lui a2, 1048574
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: lui a2, 1
; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV32I-NEXT: srli a1, sp, 13
; RV32I-NEXT: slli sp, a1, 13
; RV32I-NEXT: mv s1, sp
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: lui a1, 1048574
-; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
+; RV32I-NEXT: lui a1, 1048574
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index b1c0755c36ec1..70c082026bda8 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,8 +642,6 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sw a2, 0(a1)
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -2048
; RV64I-NEXT: lui a1, 1
@@ -698,11 +696,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV32I-NEXT: add a2, s1, a2
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: li a2, 1
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: andi a0, a0, -16
-; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -2048
+; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB11_3: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index a3dcc3779f4e6..3c469ee2f3a59 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
-; CHECK32-NEXT: add %sp, 80, %i1
+; CHECK32-NEXT: add %sp, 84, %i1
; CHECK32-NEXT: and %i1, -64, %o0
; CHECK32-NEXT: add %o0, -96, %sp
; CHECK32-NEXT: add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK64-LABEL: variable_alloca_with_overalignment:
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
-; CHECK64-NEXT: add %sp, 2159, %i1
+; CHECK64-NEXT: add %sp, 2171, %i1
; CHECK64-NEXT: and %i1, -64, %o0
; CHECK64-NEXT: add %o0, -2175, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
@@ -52,8 +52,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment_2:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
-; CHECK32-NEXT: add %i0, 7, %i0
-; CHECK32-NEXT: and %i0, -8, %i0
; CHECK32-NEXT: sub %sp, %i0, %i0
; CHECK32-NEXT: add %i0, 88, %i0
; CHECK32-NEXT: and %i0, -64, %o1
@@ -67,14 +65,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
-; CHECK64-NEXT: add %i0, 15, %i0
-; CHECK64-NEXT: sethi 4194303, %i1
-; CHECK64-NEXT: or %i1, 1008, %i1
-; CHECK64-NEXT: sethi 0, %i2
-; CHECK64-NEXT: or %i2, 1, %i2
-; CHECK64-NEXT: sllx %i2, 32, %i2
-; CHECK64-NEXT: or %i2, %i1, %i1
-; CHECK64-NEXT: and %i0, %i1, %i0
; CHECK64-NEXT: sub %sp, %i0, %i0
; CHECK64-NEXT: add %i0, 2175, %i0
; CHECK64-NEXT: and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index 18bb052b47d97..fffec42c2bbb9 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
; CHECK32: ! %bb.0: ! %entry
; CHECK32-NEXT: save %sp, -96, %sp
; CHECK32-NEXT: ld [%fp+92], %o0
-; CHECK32-NEXT: add %sp, 80, %i0
+; CHECK32-NEXT: add %sp, 84, %i0
; CHECK32-NEXT: and %i0, -64, %o1
; CHECK32-NEXT: call stack_realign_helper
; CHECK32-NEXT: add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132064
More information about the llvm-commits
mailing list