[llvm] [CodeGen] Avoid aligning alloca size. (PR #132064)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 02:20:12 PDT 2025
https://github.com/jcogan-nv updated https://github.com/llvm/llvm-project/pull/132064
>From 783982893eef1b9b417ad1565c0569c2da102f3e Mon Sep 17 00:00:00 2001
From: Jonathan Cogan <jcogan at nvidia.com>
Date: Wed, 19 Mar 2025 16:46:46 +0000
Subject: [PATCH 1/2] [CodeGen] Avoid aligning alloca size.
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 44 +++++++++++++------
.../SelectionDAG/SelectionDAGBuilder.cpp | 43 +++++++++++-------
.../AArch64/GlobalISel/dynamic-alloca.ll | 6 +--
.../CodeGen/AArch64/sme-framelower-use-bp.ll | 6 +--
.../CodeGen/AArch64/stack-probing-dynamic.ll | 14 ++----
llvm/test/CodeGen/AArch64/sve-alloca.ll | 4 +-
.../PowerPC/aix-framepointer-save-restore.ll | 9 +---
llvm/test/CodeGen/PowerPC/pr46759.ll | 3 --
.../CodeGen/PowerPC/stack-clash-prologue.ll | 16 ++-----
.../RISCV/rvv/stack-probing-dynamic.ll | 12 +----
.../CodeGen/RISCV/stack-clash-prologue.ll | 6 +--
llvm/test/CodeGen/SPARC/alloca-align.ll | 14 +-----
llvm/test/CodeGen/SPARC/stack-align.ll | 4 +-
llvm/test/CodeGen/SystemZ/alloca-03.ll | 2 +-
llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll | 9 ++--
.../CodeGen/VE/Scalar/stackframe_align.ll | 14 +++---
.../test/CodeGen/VE/Scalar/stackframe_call.ll | 8 ----
.../CodeGen/VE/Scalar/stackframe_nocall.ll | 8 ----
llvm/test/CodeGen/X86/clobber_base_ptr.ll | 4 --
llvm/test/CodeGen/X86/pr50782.ll | 3 +-
.../stack-clash-small-alloc-medium-align.ll | 3 +-
.../CodeGen/X86/win64_alloca_dynalloca.ll | 18 +++-----
22 files changed, 98 insertions(+), 152 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b85239ebf08cb..d55096af8836c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U,
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
- auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
- auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
- MachineInstr::NoUWrap);
- auto AlignCst =
- MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
- auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
-
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ Align StackAlign = TFI->getStackAlign();
Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
- if (Alignment <= StackAlign)
+
+ // If the stack alignment is stricter than the alloca's alignment, ignore the
+ // alloca's alignment. We will align the size of the alloca to the stack
+ // alignment, which will guarantee that the alloca's alignment is satisfied.
+ bool IsUnderAligned = Alignment <= StackAlign;
+ if (IsUnderAligned)
Alignment = Align(1);
- MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+
+ // If the stack grows up, adding the alloca's size to SP without padding may
+ // leave SP not aligned (to the stack alignment) after the alloca because we
+ // align SP (to the stack align or alloca align) *before* adding the alloca
+ // size. On the other hand, if the stack grows down, we will align SP *after*
+ // decrementing it, so there is no need to pad the size.
+ if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+ IsUnderAligned) {
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ auto SAMinusOne =
+ MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
+ auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+ MachineInstr::NoUWrap);
+ auto AlignCst =
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
+ auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
+
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+ } else {
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment);
+ }
MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
assert(MF->getFrameInfo().hasVarSizedObjects());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 14bb1d943d2d6..fe1a8bab30e54 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
}
- // Handle alignment. If the requested alignment is less than or equal to
- // the stack alignment, ignore it. If the size is greater than or equal to
- // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
- if (*Alignment <= StackAlign)
+ // Handle alignment. If the requested alignment is less than or equal to the
+ // stack alignment, ignore it since we will align the size. If the size is
+ // greater than or equal to the stack alignment, we note this in the
+ // DYNAMIC_STACKALLOC node.
+ const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
+ Align StackAlign = TFI->getStackAlign();
+ bool IsUnderAligned = *Alignment <= StackAlign;
+ if (IsUnderAligned)
Alignment = std::nullopt;
- const uint64_t StackAlignMask = StackAlign.value() - 1U;
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(StackAlignMask, dl, IntPtr),
- SDNodeFlags::NoUnsignedWrap);
-
- // Mask out the low bits for alignment purposes.
- AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
- DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+ // If the stack grows up, adding the alloca's size to SP without padding may
+ // leave SP not aligned (to the stack alignment) after the alloca because we
+ // align SP (to the stack align or alloca align) *before* adding the alloca
+ // size. On the other hand, if the stack grows down, we will align SP *after*
+ // decrementing it, so there is no need to align the size.
+ if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+ IsUnderAligned) {
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlignMask, dl, IntPtr),
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+ }
SDValue Ops[] = {
getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
index 88eaa1382d1d6..0f74b51262339 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
@@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) {
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
- ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32
+ ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32
; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0)
; CHECK: RET_ReallyLR implicit $x0
%addr = alloca i8, i32 %numelts, align 32
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index f49bb910b5bd1..85c6b2a2fb854 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,11 +160,7 @@ define void @quux() #1 {
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, #16
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: addvl x9, x8, #2
-; CHECK-NEXT: mov w0, w9
-; CHECK-NEXT: // implicit-def: $x9
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: and x9, x9, #0x7f0
+; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index e7687f0d3994b..b32eb210ce0e7 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: sub x9, sp, #32
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-DAG: str xzr, [sp]
-; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT: str xzr, [sp]
; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x9
+; CHECK-DAG: sub x8, x8, x0
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-DAG: ldr xzr, [sp]
-; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT: ldr xzr, [sp]
; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x9
+; CHECK-DAG: sub x8, x8, x0
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 2520095cce62e..3d3c5da483015 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,10 +54,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: add x9, x9, #15
-; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK-NEXT: cnth x9, all, mul #4
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0
; CHECK-NEXT: mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 44281bcc3647d..8895a9a920569 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX32-NEXT: slwi 3, 3, 2
; AIX32-NEXT: lwz 4, 0(1)
; AIX32-NEXT: li 5, -64
-; AIX32-NEXT: addi 3, 3, 15
-; AIX32-NEXT: mr 31, 1
-; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27
; AIX32-NEXT: neg 3, 3
+; AIX32-NEXT: mr 31, 1
; AIX32-NEXT: and 5, 3, 5
; AIX32-NEXT: stwux 4, 1, 5
; AIX32-NEXT: addi 3, 1, 64
@@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX64-NEXT: rldic 3, 3, 2, 30
; AIX64-NEXT: ld 4, 0(1)
; AIX64-NEXT: li 5, -64
-; AIX64-NEXT: addi 3, 3, 15
-; AIX64-NEXT: mr 31, 1
-; AIX64-NEXT: rldicl 3, 3, 60, 4
-; AIX64-NEXT: rldicl 3, 3, 4, 29
; AIX64-NEXT: neg 3, 3
+; AIX64-NEXT: mr 31, 1
; AIX64-NEXT: and 5, 3, 5
; AIX64-NEXT: stdux 4, 1, 5
; AIX64-NEXT: addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index d1129b1825aee..8653ca997c4ed 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 {
; CHECK-LE-NEXT: li r4, -2048
; CHECK-LE-NEXT: li r6, -4096
; CHECK-LE-NEXT: mr r31, r1
-; CHECK-LE-NEXT: addi r3, r3, 15
-; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
-; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index b887bbb8c99f8..15d0cd5e9baee 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r31, -8
; CHECK-LE-NEXT: .cfi_offset r30, -16
-; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: lis r5, 1
; CHECK-LE-NEXT: mr r31, r1
-; CHECK-LE-NEXT: li r6, 1
; CHECK-LE-NEXT: sldi r4, r4, 2
-; CHECK-LE-NEXT: addi r3, r3, 15
+; CHECK-LE-NEXT: li r6, 1
+; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: ori r5, r5, 0
-; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
; CHECK-LE-NEXT: add r5, r31, r5
-; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: stwx r6, r5, r4
+; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: li r4, -32768
; CHECK-LE-NEXT: li r6, -4096
-; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
; CHECK-LE-NEXT: mr r5, r4
@@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r31, -8
; CHECK-BE-NEXT: .cfi_offset r30, -16
-; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: lis r5, 1
-; CHECK-BE-NEXT: addi r3, r3, 15
; CHECK-BE-NEXT: mr r31, r1
; CHECK-BE-NEXT: ori r5, r5, 0
-; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
; CHECK-BE-NEXT: add r5, r31, r5
; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: li r6, 1
-; CHECK-BE-NEXT: rldicl r3, r3, 4, 31
+; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: stwx r6, r5, r4
; CHECK-BE-NEXT: neg r7, r3
; CHECK-BE-NEXT: li r4, -32768
@@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-32-NEXT: lis r4, 1
; CHECK-32-NEXT: mr r31, r1
; CHECK-32-NEXT: ori r4, r4, 0
-; CHECK-32-NEXT: addi r3, r3, 15
; CHECK-32-NEXT: add r4, r31, r4
; CHECK-32-NEXT: li r5, 1
; CHECK-32-NEXT: slwi r6, r6, 2
-; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
; CHECK-32-NEXT: neg r7, r3
; CHECK-32-NEXT: stwx r5, r4, r6
; CHECK-32-NEXT: li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..07daca9c7851e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV64I-NEXT: .cfi_def_cfa s0, 0
; RV64I-NEXT: andi sp, sp, -64
; RV64I-NEXT: mv s1, sp
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -64
; RV64I-NEXT: lui a2, 1
@@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV32I-NEXT: .cfi_def_cfa s0, 0
; RV32I-NEXT: andi sp, sp, -64
; RV32I-NEXT: mv s1, sp
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -64
; RV32I-NEXT: lui a1, 1
@@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV64I-NEXT: srli a2, sp, 13
; RV64I-NEXT: slli sp, a2, 13
; RV64I-NEXT: mv s1, sp
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: lui a2, 1048574
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
+; RV64I-NEXT: lui a2, 1048574
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: lui a2, 1
; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV32I-NEXT: srli a1, sp, 13
; RV32I-NEXT: slli sp, a1, 13
; RV32I-NEXT: mv s1, sp
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: lui a1, 1048574
-; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
+; RV32I-NEXT: lui a1, 1048574
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index b1c0755c36ec1..70c082026bda8 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,8 +642,6 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sw a2, 0(a1)
-; RV64I-NEXT: addi a0, a0, 15
-; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -2048
; RV64I-NEXT: lui a1, 1
@@ -698,11 +696,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV32I-NEXT: add a2, s1, a2
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: li a2, 1
-; RV32I-NEXT: addi a0, a0, 15
-; RV32I-NEXT: andi a0, a0, -16
-; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -2048
+; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB11_3: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index a3dcc3779f4e6..3c469ee2f3a59 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
-; CHECK32-NEXT: add %sp, 80, %i1
+; CHECK32-NEXT: add %sp, 84, %i1
; CHECK32-NEXT: and %i1, -64, %o0
; CHECK32-NEXT: add %o0, -96, %sp
; CHECK32-NEXT: add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK64-LABEL: variable_alloca_with_overalignment:
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
-; CHECK64-NEXT: add %sp, 2159, %i1
+; CHECK64-NEXT: add %sp, 2171, %i1
; CHECK64-NEXT: and %i1, -64, %o0
; CHECK64-NEXT: add %o0, -2175, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
@@ -52,8 +52,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment_2:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
-; CHECK32-NEXT: add %i0, 7, %i0
-; CHECK32-NEXT: and %i0, -8, %i0
; CHECK32-NEXT: sub %sp, %i0, %i0
; CHECK32-NEXT: add %i0, 88, %i0
; CHECK32-NEXT: and %i0, -64, %o1
@@ -67,14 +65,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
-; CHECK64-NEXT: add %i0, 15, %i0
-; CHECK64-NEXT: sethi 4194303, %i1
-; CHECK64-NEXT: or %i1, 1008, %i1
-; CHECK64-NEXT: sethi 0, %i2
-; CHECK64-NEXT: or %i2, 1, %i2
-; CHECK64-NEXT: sllx %i2, 32, %i2
-; CHECK64-NEXT: or %i2, %i1, %i1
-; CHECK64-NEXT: and %i0, %i1, %i0
; CHECK64-NEXT: sub %sp, %i0, %i0
; CHECK64-NEXT: add %i0, 2175, %i0
; CHECK64-NEXT: and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index 18bb052b47d97..fffec42c2bbb9 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
; CHECK32: ! %bb.0: ! %entry
; CHECK32-NEXT: save %sp, -96, %sp
; CHECK32-NEXT: ld [%fp+92], %o0
-; CHECK32-NEXT: add %sp, 80, %i0
+; CHECK32-NEXT: add %sp, 84, %i0
; CHECK32-NEXT: and %i0, -64, %o1
; CHECK32-NEXT: call stack_realign_helper
; CHECK32-NEXT: add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
; CHECK64-LABEL: stack_realign:
; CHECK64: ! %bb.0: ! %entry
; CHECK64-NEXT: save %sp, -128, %sp
-; CHECK64-NEXT: add %sp, 2159, %i0
+; CHECK64-NEXT: add %sp, 2171, %i0
; CHECK64-NEXT: and %i0, -64, %o1
; CHECK64-NEXT: add %o1, -2175, %sp
; CHECK64-NEXT: add %sp, -48, %sp
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index e331bfbfb8ab3..3ad687df50295 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -114,7 +114,7 @@ define void @f5() {
; CHECK-NEXT: lgr %r11, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r11
; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: aghi %r1, -128
+; CHECK-NEXT: aghi %r1, -124
; CHECK-NEXT: la %r2, 280(%r1)
; CHECK-NEXT: nill %r2, 65408
; CHECK-NEXT: lgr %r15, %r1
diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
index a1002c540c14f..99c4a9fcdd8b1 100644
--- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
+++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
@@ -7,12 +7,11 @@ define void @test(i64 %n) {
; CHECK-LABEL: test:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, %s0
-; CHECK-NEXT: lea %s0, 15(, %s0)
-; CHECK-NEXT: and %s0, -16, %s0
-; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
-; CHECK-NEXT: and %s1, %s1, (32)0
-; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
+; CHECK-NEXT: lea %s0, __ve_grow_stack_align at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s0)
; CHECK-NEXT: or %s1, -32, (0)1
+; CHECK-NEXT: or %s0, 0, %s2
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: lea %s0, 240(, %s11)
; CHECK-NEXT: lea %s0, 31(, %s0)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
index d90c0bcf9f837..4121e9507bad3 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
@@ -407,10 +407,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: ld1b.zx %s0, (, %s0)
-; CHECK-NEXT: st1b %s0, 272(, %s17)
-; CHECK-NEXT: lea %s0, 15(, %s1)
-; CHECK-NEXT: and %s0, -16, %s0
+; CHECK-NEXT: ld1b.zx %s2, (, %s0)
+; CHECK-NEXT: or %s0, 0, %s1
+; CHECK-NEXT: st1b %s2, 272(, %s17)
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -447,10 +446,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
; CHECKFP-NEXT: monc
; CHECKFP-NEXT: or %s0, 0, %s62
; CHECKFP-NEXT: .LBB6_2:
-; CHECKFP-NEXT: ld1b.zx %s0, (, %s0)
-; CHECKFP-NEXT: st1b %s0, 272(, %s17)
-; CHECKFP-NEXT: lea %s0, 15(, %s1)
-; CHECKFP-NEXT: and %s0, -16, %s0
+; CHECKFP-NEXT: ld1b.zx %s2, (, %s0)
+; CHECKFP-NEXT: or %s0, 0, %s1
+; CHECKFP-NEXT: st1b %s2, 272(, %s17)
; CHECKFP-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECKFP-NEXT: and %s1, %s1, (32)0
; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
index 3a3b1ba1544c4..02a1298141265 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
@@ -180,8 +180,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: lea %s0, 15(, %s0)
-; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -227,8 +225,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT: lea %s0, 15(, %s0)
-; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
@@ -447,8 +443,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB5_2:
-; CHECK-NEXT: lea %s0, 15(, %s0)
-; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -499,8 +493,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT: lea %s0, 15(, %s0)
-; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
index f9308a172ad05..20affad0fc7bd 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
@@ -103,8 +103,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: or %s2, 0, %s1
-; CHECK-NEXT: lea %s0, 15(, %s0)
-; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -149,8 +147,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT: lea %s0, 15(, %s0)
-; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
@@ -300,8 +296,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: or %s2, 0, %s1
-; CHECK-NEXT: lea %s0, 15(, %s0)
-; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -346,8 +340,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT: lea %s0, 15(, %s0)
-; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 2c39560f02d16..2bd1c69bc521d 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -25,8 +25,6 @@ define i32 @clober_bp() {
; CHECK-NEXT: .cfi_offset %edi, -12
; CHECK-NEXT: movl $4, 12(%esi)
; CHECK-NEXT: movl 12(%esi), %eax
-; CHECK-NEXT: addl $3, %eax
-; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
@@ -78,8 +76,6 @@ define i32 @clobber_bpfp() {
; CHECK-NEXT: .cfi_offset %edi, -12
; CHECK-NEXT: movl $4, 12(%esi)
; CHECK-NEXT: movl 12(%esi), %eax
-; CHECK-NEXT: addl $3, %eax
-; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll
index 591a33446d4e3..0cdbd5721e6b1 100644
--- a/llvm/test/CodeGen/X86/pr50782.ll
+++ b/llvm/test/CodeGen/X86/pr50782.ll
@@ -25,8 +25,7 @@ define void @h(float %i) {
; CHECK-NEXT: .cfi_offset %esi, -12
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: movl _a, %ecx
-; CHECK-NEXT: leal 3(%ecx), %eax
-; CHECK-NEXT: andl $-4, %eax
+; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
index ccf7e1d56da90..dc2503ecece91 100644
--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
@@ -99,8 +99,7 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
; CHECK-NEXT: movl $1, (%rbx,%rdi,4)
; CHECK-NEXT: movl (%rbx), %ecx
; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: leaq 15(,%rcx,4), %rcx
-; CHECK-NEXT: andq $-16, %rcx
+; CHECK-NEXT: shlq $2, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: cmpq %rsp, %rax
; CHECK-NEXT: jge .LBB3_3
diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
index 241188b8cc3d5..d636896467b00 100644
--- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -95,28 +95,24 @@ entry:
%buf1 = alloca i8, i64 %n, align 128
-; M64: leaq 15(%{{.*}}), %rax
-; M64: andq $-16, %rax
+; M64: movq %rcx, %rax
; M64: callq ___chkstk_ms
; M64: subq %rax, %rsp
; M64: movq %rsp, [[R2:%r.*]]
; M64: andq $-128, [[R2]]
; M64: movq [[R2]], %rsp
-; W64: leaq 15(%{{.*}}), %rax
-; W64: andq $-16, %rax
+; W64: movq %rcx, %rax
; W64: callq __chkstk
; W64: subq %rax, %rsp
; W64: movq %rsp, [[R2:%r.*]]
; W64: andq $-128, [[R2]]
; W64: movq [[R2]], %rsp
-; EFI: leaq 15(%{{.*}}), [[R1:%r.*]]
-; EFI: andq $-16, [[R1]]
-; EFI: movq %rsp, [[R64:%r.*]]
-; EFI: subq [[R1]], [[R64]]
-; EFI: andq $-128, [[R64]]
-; EFI: movq [[R64]], %rsp
+; EFI: movq %rsp, %rax
+; EFI: subq %rcx, %rax
+; EFI: andq $-128, %rax
+; EFI: movq %rax, %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind
@@ -129,7 +125,7 @@ entry:
; W64: callq bar
; EFI: subq $48, %rsp
-; EFI: movq [[R64]], 32(%rsp)
+; EFI: movq %rax, 32(%rsp)
; EFI: callq _bar
ret i64 %r
>From 5ca41197b3651b9d30bcd62affc2fadca753abb7 Mon Sep 17 00:00:00 2001
From: Jonathan Cogan <jcogan at nvidia.com>
Date: Mon, 24 Mar 2025 09:18:03 +0000
Subject: [PATCH 2/2] Revert changes to SelectionDAG.
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 43 +++++++------------
.../CodeGen/AArch64/sme-framelower-use-bp.ll | 6 ++-
.../CodeGen/AArch64/stack-probing-dynamic.ll | 40 ++++++++++++-----
llvm/test/CodeGen/AArch64/sve-alloca.ll | 4 +-
.../PowerPC/aix-framepointer-save-restore.ll | 9 +++-
llvm/test/CodeGen/PowerPC/pr46759.ll | 3 ++
.../CodeGen/PowerPC/stack-clash-prologue.ll | 16 +++++--
.../RISCV/rvv/stack-probing-dynamic.ll | 12 +++++-
.../CodeGen/RISCV/stack-clash-prologue.ll | 6 ++-
llvm/test/CodeGen/SPARC/alloca-align.ll | 14 +++++-
llvm/test/CodeGen/SPARC/stack-align.ll | 4 +-
llvm/test/CodeGen/SystemZ/alloca-03.ll | 2 +-
llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll | 9 ++--
.../CodeGen/VE/Scalar/stackframe_align.ll | 14 +++---
.../test/CodeGen/VE/Scalar/stackframe_call.ll | 8 ++++
.../CodeGen/VE/Scalar/stackframe_nocall.ll | 8 ++++
llvm/test/CodeGen/X86/clobber_base_ptr.ll | 4 ++
llvm/test/CodeGen/X86/pr50782.ll | 3 +-
.../stack-clash-small-alloc-medium-align.ll | 3 +-
.../CodeGen/X86/win64_alloca_dynalloca.ll | 18 +++++---
20 files changed, 154 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe1a8bab30e54..14bb1d943d2d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,35 +4447,24 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
}
- // Handle alignment. If the requested alignment is less than or equal to the
- // stack alignment, ignore it since we will align the size. If the size is
- // greater than or equal to the stack alignment, we note this in the
- // DYNAMIC_STACKALLOC node.
- const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
- Align StackAlign = TFI->getStackAlign();
- bool IsUnderAligned = *Alignment <= StackAlign;
- if (IsUnderAligned)
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
+ if (*Alignment <= StackAlign)
Alignment = std::nullopt;
- // If the stack grows up, adding the alloca's size to SP without padding may
- // leave SP not aligned (to the stack alignment) after the alloca because we
- // align SP (to the stack align or alloca align) *before* adding the alloca
- // size. On the other hand, if the stack grows down, we will align SP *after*
- // decrementing it, so there is no need to align the size.
- if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
- IsUnderAligned) {
- const uint64_t StackAlignMask = StackAlign.value() - 1U;
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(StackAlignMask, dl, IntPtr),
- SDNodeFlags::NoUnsignedWrap);
-
- // Mask out the low bits for alignment purposes.
- AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
- DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
- }
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlignMask, dl, IntPtr),
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
SDValue Ops[] = {
getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index 85c6b2a2fb854..f49bb910b5bd1 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,7 +160,11 @@ define void @quux() #1 {
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, #16
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: rdvl x9, #2
+; CHECK-NEXT: addvl x9, x8, #2
+; CHECK-NEXT: mov w0, w9
+; CHECK-NEXT: // implicit-def: $x9
+; CHECK-NEXT: mov w9, w0
+; CHECK-NEXT: and x9, x9, #0x7f0
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index b32eb210ce0e7..ee74469c0a76a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s
+; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Dynamically-sized allocation, needs a loop which can handle any size at
; runtime. The final iteration of the loop will temporarily put SP below the
@@ -107,10 +107,20 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: sub x9, sp, #32
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: str xzr, [sp]
-; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x0
+;
+; CHECK-SD-NEXT: add x9, x0, #15
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-DAG: str xzr, [sp]
+; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0
+; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-SD-DAG: mov x19, sp
+; CHECK-SD-DAG: sub x8, x8, x9
+;
+; CHECK-GI-NEXT: mov x8, sp
+; CHECK-GI-NEXT: str xzr, [sp]
+; CHECK-GI-DAG: mov x19, sp
+; CHECK-GI-DAG: sub x8, x8, x0
+;
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -164,10 +174,20 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: ldr xzr, [sp]
-; CHECK-DAG: mov x19, sp
-; CHECK-DAG: sub x8, x8, x0
+;
+; CHECK-SD-NEXT: add x9, x0, #15
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-DAG: ldr xzr, [sp]
+; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0
+; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-SD-DAG: mov x19, sp
+; CHECK-SD-DAG: sub x8, x8, x9
+;
+; CHECK-GI-NEXT: mov x8, sp
+; CHECK-GI-NEXT: ldr xzr, [sp]
+; CHECK-GI-DAG: mov x19, sp
+; CHECK-GI-DAG: sub x8, x8, x0
+;
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 3d3c5da483015..2520095cce62e 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,8 +54,10 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
+; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: cnth x9, all, mul #4
+; CHECK-NEXT: add x9, x9, #15
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0
; CHECK-NEXT: mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 8895a9a920569..44281bcc3647d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,8 +82,10 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX32-NEXT: slwi 3, 3, 2
; AIX32-NEXT: lwz 4, 0(1)
; AIX32-NEXT: li 5, -64
-; AIX32-NEXT: neg 3, 3
+; AIX32-NEXT: addi 3, 3, 15
; AIX32-NEXT: mr 31, 1
+; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27
+; AIX32-NEXT: neg 3, 3
; AIX32-NEXT: and 5, 3, 5
; AIX32-NEXT: stwux 4, 1, 5
; AIX32-NEXT: addi 3, 1, 64
@@ -109,8 +111,11 @@ define dso_local void @frameptr_realigned(i32 %n) {
; AIX64-NEXT: rldic 3, 3, 2, 30
; AIX64-NEXT: ld 4, 0(1)
; AIX64-NEXT: li 5, -64
-; AIX64-NEXT: neg 3, 3
+; AIX64-NEXT: addi 3, 3, 15
; AIX64-NEXT: mr 31, 1
+; AIX64-NEXT: rldicl 3, 3, 60, 4
+; AIX64-NEXT: rldicl 3, 3, 4, 29
+; AIX64-NEXT: neg 3, 3
; AIX64-NEXT: and 5, 3, 5
; AIX64-NEXT: stdux 4, 1, 5
; AIX64-NEXT: addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 8653ca997c4ed..d1129b1825aee 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,6 +33,9 @@ define void @foo(i32 %vla_size) #0 {
; CHECK-LE-NEXT: li r4, -2048
; CHECK-LE-NEXT: li r6, -4096
; CHECK-LE-NEXT: mr r31, r1
+; CHECK-LE-NEXT: addi r3, r3, 15
+; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
+; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index 15d0cd5e9baee..b887bbb8c99f8 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,17 +835,20 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r31, -8
; CHECK-LE-NEXT: .cfi_offset r30, -16
+; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: lis r5, 1
; CHECK-LE-NEXT: mr r31, r1
-; CHECK-LE-NEXT: sldi r4, r4, 2
; CHECK-LE-NEXT: li r6, 1
-; CHECK-LE-NEXT: clrldi r3, r3, 32
+; CHECK-LE-NEXT: sldi r4, r4, 2
+; CHECK-LE-NEXT: addi r3, r3, 15
; CHECK-LE-NEXT: ori r5, r5, 0
+; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
; CHECK-LE-NEXT: add r5, r31, r5
+; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: stwx r6, r5, r4
-; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: li r4, -32768
; CHECK-LE-NEXT: li r6, -4096
+; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r4, r5, r4
; CHECK-LE-NEXT: mr r5, r4
@@ -893,13 +896,16 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r31, -8
; CHECK-BE-NEXT: .cfi_offset r30, -16
+; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: lis r5, 1
+; CHECK-BE-NEXT: addi r3, r3, 15
; CHECK-BE-NEXT: mr r31, r1
; CHECK-BE-NEXT: ori r5, r5, 0
+; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
; CHECK-BE-NEXT: add r5, r31, r5
; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: li r6, 1
-; CHECK-BE-NEXT: clrldi r3, r3, 32
+; CHECK-BE-NEXT: rldicl r3, r3, 4, 31
; CHECK-BE-NEXT: stwx r6, r5, r4
; CHECK-BE-NEXT: neg r7, r3
; CHECK-BE-NEXT: li r4, -32768
@@ -958,9 +964,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-32-NEXT: lis r4, 1
; CHECK-32-NEXT: mr r31, r1
; CHECK-32-NEXT: ori r4, r4, 0
+; CHECK-32-NEXT: addi r3, r3, 15
; CHECK-32-NEXT: add r4, r31, r4
; CHECK-32-NEXT: li r5, 1
; CHECK-32-NEXT: slwi r6, r6, 2
+; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
; CHECK-32-NEXT: neg r7, r3
; CHECK-32-NEXT: stwx r5, r4, r6
; CHECK-32-NEXT: li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index 07daca9c7851e..c3c1643e6de01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,6 +180,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV64I-NEXT: .cfi_def_cfa s0, 0
; RV64I-NEXT: andi sp, sp, -64
; RV64I-NEXT: mv s1, sp
+; RV64I-NEXT: addi a0, a0, 15
+; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -64
; RV64I-NEXT: lui a2, 1
@@ -217,6 +219,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; RV32I-NEXT: .cfi_def_cfa s0, 0
; RV32I-NEXT: andi sp, sp, -64
; RV32I-NEXT: mv s1, sp
+; RV32I-NEXT: addi a0, a0, 15
+; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -64
; RV32I-NEXT: lui a1, 1
@@ -274,8 +278,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV64I-NEXT: srli a2, sp, 13
; RV64I-NEXT: slli sp, a2, 13
; RV64I-NEXT: mv s1, sp
-; RV64I-NEXT: sub a0, sp, a0
+; RV64I-NEXT: addi a0, a0, 15
; RV64I-NEXT: lui a2, 1048574
+; RV64I-NEXT: andi a0, a0, -16
+; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: lui a2, 1
; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -323,8 +329,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; RV32I-NEXT: srli a1, sp, 13
; RV32I-NEXT: slli sp, a1, 13
; RV32I-NEXT: mv s1, sp
-; RV32I-NEXT: sub a0, sp, a0
+; RV32I-NEXT: addi a0, a0, 15
; RV32I-NEXT: lui a1, 1048574
+; RV32I-NEXT: andi a0, a0, -16
+; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index 70c082026bda8..b1c0755c36ec1 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,6 +642,8 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sw a2, 0(a1)
+; RV64I-NEXT: addi a0, a0, 15
+; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: andi a0, a0, -2048
; RV64I-NEXT: lui a1, 1
@@ -696,9 +698,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
; RV32I-NEXT: add a2, s1, a2
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: li a2, 1
+; RV32I-NEXT: addi a0, a0, 15
+; RV32I-NEXT: andi a0, a0, -16
+; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: andi a0, a0, -2048
-; RV32I-NEXT: sw a2, 0(a1)
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB11_3: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index 3c469ee2f3a59..a3dcc3779f4e6 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
-; CHECK32-NEXT: add %sp, 84, %i1
+; CHECK32-NEXT: add %sp, 80, %i1
; CHECK32-NEXT: and %i1, -64, %o0
; CHECK32-NEXT: add %o0, -96, %sp
; CHECK32-NEXT: add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK64-LABEL: variable_alloca_with_overalignment:
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
-; CHECK64-NEXT: add %sp, 2171, %i1
+; CHECK64-NEXT: add %sp, 2159, %i1
; CHECK64-NEXT: and %i1, -64, %o0
; CHECK64-NEXT: add %o0, -2175, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
@@ -52,6 +52,8 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment_2:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
+; CHECK32-NEXT: add %i0, 7, %i0
+; CHECK32-NEXT: and %i0, -8, %i0
; CHECK32-NEXT: sub %sp, %i0, %i0
; CHECK32-NEXT: add %i0, 88, %i0
; CHECK32-NEXT: and %i0, -64, %o1
@@ -65,6 +67,14 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
+; CHECK64-NEXT: add %i0, 15, %i0
+; CHECK64-NEXT: sethi 4194303, %i1
+; CHECK64-NEXT: or %i1, 1008, %i1
+; CHECK64-NEXT: sethi 0, %i2
+; CHECK64-NEXT: or %i2, 1, %i2
+; CHECK64-NEXT: sllx %i2, 32, %i2
+; CHECK64-NEXT: or %i2, %i1, %i1
+; CHECK64-NEXT: and %i0, %i1, %i0
; CHECK64-NEXT: sub %sp, %i0, %i0
; CHECK64-NEXT: add %i0, 2175, %i0
; CHECK64-NEXT: and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index fffec42c2bbb9..18bb052b47d97 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
; CHECK32: ! %bb.0: ! %entry
; CHECK32-NEXT: save %sp, -96, %sp
; CHECK32-NEXT: ld [%fp+92], %o0
-; CHECK32-NEXT: add %sp, 84, %i0
+; CHECK32-NEXT: add %sp, 80, %i0
; CHECK32-NEXT: and %i0, -64, %o1
; CHECK32-NEXT: call stack_realign_helper
; CHECK32-NEXT: add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
; CHECK64-LABEL: stack_realign:
; CHECK64: ! %bb.0: ! %entry
; CHECK64-NEXT: save %sp, -128, %sp
-; CHECK64-NEXT: add %sp, 2171, %i0
+; CHECK64-NEXT: add %sp, 2159, %i0
; CHECK64-NEXT: and %i0, -64, %o1
; CHECK64-NEXT: add %o1, -2175, %sp
; CHECK64-NEXT: add %sp, -48, %sp
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index 3ad687df50295..e331bfbfb8ab3 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -114,7 +114,7 @@ define void @f5() {
; CHECK-NEXT: lgr %r11, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r11
; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: aghi %r1, -124
+; CHECK-NEXT: aghi %r1, -128
; CHECK-NEXT: la %r2, 280(%r1)
; CHECK-NEXT: nill %r2, 65408
; CHECK-NEXT: lgr %r15, %r1
diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
index 99c4a9fcdd8b1..a1002c540c14f 100644
--- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
+++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
@@ -7,11 +7,12 @@ define void @test(i64 %n) {
; CHECK-LABEL: test:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, %s0
-; CHECK-NEXT: lea %s0, __ve_grow_stack_align at lo
-; CHECK-NEXT: and %s0, %s0, (32)0
-; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s0)
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
+; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
; CHECK-NEXT: or %s1, -32, (0)1
-; CHECK-NEXT: or %s0, 0, %s2
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: lea %s0, 240(, %s11)
; CHECK-NEXT: lea %s0, 31(, %s0)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
index 4121e9507bad3..d90c0bcf9f837 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
@@ -407,9 +407,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: ld1b.zx %s2, (, %s0)
-; CHECK-NEXT: or %s0, 0, %s1
-; CHECK-NEXT: st1b %s2, 272(, %s17)
+; CHECK-NEXT: ld1b.zx %s0, (, %s0)
+; CHECK-NEXT: st1b %s0, 272(, %s17)
+; CHECK-NEXT: lea %s0, 15(, %s1)
+; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -446,9 +447,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
; CHECKFP-NEXT: monc
; CHECKFP-NEXT: or %s0, 0, %s62
; CHECKFP-NEXT: .LBB6_2:
-; CHECKFP-NEXT: ld1b.zx %s2, (, %s0)
-; CHECKFP-NEXT: or %s0, 0, %s1
-; CHECKFP-NEXT: st1b %s2, 272(, %s17)
+; CHECKFP-NEXT: ld1b.zx %s0, (, %s0)
+; CHECKFP-NEXT: st1b %s0, 272(, %s17)
+; CHECKFP-NEXT: lea %s0, 15(, %s1)
+; CHECKFP-NEXT: and %s0, -16, %s0
; CHECKFP-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECKFP-NEXT: and %s1, %s1, (32)0
; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
index 02a1298141265..3a3b1ba1544c4 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
@@ -180,6 +180,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -225,6 +227,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: lea %s0, 15(, %s0)
+; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
@@ -443,6 +447,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -493,6 +499,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: lea %s0, 15(, %s0)
+; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
index 20affad0fc7bd..f9308a172ad05 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
@@ -103,6 +103,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: or %s2, 0, %s1
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -147,6 +149,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: lea %s0, 15(, %s0)
+; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
@@ -296,6 +300,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: or %s2, 0, %s1
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -340,6 +346,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: lea %s0, 15(, %s0)
+; PIC-NEXT: and %s0, -16, %s0
; PIC-NEXT: lea %s12, __ve_grow_stack_align at plt_lo(-24)
; PIC-NEXT: and %s12, %s12, (32)0
; PIC-NEXT: sic %s16
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 2bd1c69bc521d..2c39560f02d16 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -25,6 +25,8 @@ define i32 @clober_bp() {
; CHECK-NEXT: .cfi_offset %edi, -12
; CHECK-NEXT: movl $4, 12(%esi)
; CHECK-NEXT: movl 12(%esi), %eax
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
@@ -76,6 +78,8 @@ define i32 @clobber_bpfp() {
; CHECK-NEXT: .cfi_offset %edi, -12
; CHECK-NEXT: movl $4, 12(%esi)
; CHECK-NEXT: movl 12(%esi), %eax
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll
index 0cdbd5721e6b1..591a33446d4e3 100644
--- a/llvm/test/CodeGen/X86/pr50782.ll
+++ b/llvm/test/CodeGen/X86/pr50782.ll
@@ -25,7 +25,8 @@ define void @h(float %i) {
; CHECK-NEXT: .cfi_offset %esi, -12
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: movl _a, %ecx
-; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: leal 3(%ecx), %eax
+; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
index dc2503ecece91..ccf7e1d56da90 100644
--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
@@ -99,7 +99,8 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
; CHECK-NEXT: movl $1, (%rbx,%rdi,4)
; CHECK-NEXT: movl (%rbx), %ecx
; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: shlq $2, %rcx
+; CHECK-NEXT: leaq 15(,%rcx,4), %rcx
+; CHECK-NEXT: andq $-16, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: cmpq %rsp, %rax
; CHECK-NEXT: jge .LBB3_3
diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
index d636896467b00..241188b8cc3d5 100644
--- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -95,24 +95,28 @@ entry:
%buf1 = alloca i8, i64 %n, align 128
-; M64: movq %rcx, %rax
+; M64: leaq 15(%{{.*}}), %rax
+; M64: andq $-16, %rax
; M64: callq ___chkstk_ms
; M64: subq %rax, %rsp
; M64: movq %rsp, [[R2:%r.*]]
; M64: andq $-128, [[R2]]
; M64: movq [[R2]], %rsp
-; W64: movq %rcx, %rax
+; W64: leaq 15(%{{.*}}), %rax
+; W64: andq $-16, %rax
; W64: callq __chkstk
; W64: subq %rax, %rsp
; W64: movq %rsp, [[R2:%r.*]]
; W64: andq $-128, [[R2]]
; W64: movq [[R2]], %rsp
-; EFI: movq %rsp, %rax
-; EFI: subq %rcx, %rax
-; EFI: andq $-128, %rax
-; EFI: movq %rax, %rsp
+; EFI: leaq 15(%{{.*}}), [[R1:%r.*]]
+; EFI: andq $-16, [[R1]]
+; EFI: movq %rsp, [[R64:%r.*]]
+; EFI: subq [[R1]], [[R64]]
+; EFI: andq $-128, [[R64]]
+; EFI: movq [[R64]], %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind
@@ -125,7 +129,7 @@ entry:
; W64: callq bar
; EFI: subq $48, %rsp
-; EFI: movq %rax, 32(%rsp)
+; EFI: movq [[R64]], 32(%rsp)
; EFI: callq _bar
ret i64 %r
More information about the llvm-commits
mailing list