[llvm] [CodeGen] Avoid aligning alloca size. (PR #132064)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 02:20:12 PDT 2025


https://github.com/jcogan-nv updated https://github.com/llvm/llvm-project/pull/132064

>From 783982893eef1b9b417ad1565c0569c2da102f3e Mon Sep 17 00:00:00 2001
From: Jonathan Cogan <jcogan at nvidia.com>
Date: Wed, 19 Mar 2025 16:46:46 +0000
Subject: [PATCH 1/2] [CodeGen] Avoid aligning alloca size.

---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 44 +++++++++++++------
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 43 +++++++++++-------
 .../AArch64/GlobalISel/dynamic-alloca.ll      |  6 +--
 .../CodeGen/AArch64/sme-framelower-use-bp.ll  |  6 +--
 .../CodeGen/AArch64/stack-probing-dynamic.ll  | 14 ++----
 llvm/test/CodeGen/AArch64/sve-alloca.ll       |  4 +-
 .../PowerPC/aix-framepointer-save-restore.ll  |  9 +---
 llvm/test/CodeGen/PowerPC/pr46759.ll          |  3 --
 .../CodeGen/PowerPC/stack-clash-prologue.ll   | 16 ++-----
 .../RISCV/rvv/stack-probing-dynamic.ll        | 12 +----
 .../CodeGen/RISCV/stack-clash-prologue.ll     |  6 +--
 llvm/test/CodeGen/SPARC/alloca-align.ll       | 14 +-----
 llvm/test/CodeGen/SPARC/stack-align.ll        |  4 +-
 llvm/test/CodeGen/SystemZ/alloca-03.ll        |  2 +-
 llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll |  9 ++--
 .../CodeGen/VE/Scalar/stackframe_align.ll     | 14 +++---
 .../test/CodeGen/VE/Scalar/stackframe_call.ll |  8 ----
 .../CodeGen/VE/Scalar/stackframe_nocall.ll    |  8 ----
 llvm/test/CodeGen/X86/clobber_base_ptr.ll     |  4 --
 llvm/test/CodeGen/X86/pr50782.ll              |  3 +-
 .../stack-clash-small-alloc-medium-align.ll   |  3 +-
 .../CodeGen/X86/win64_alloca_dynalloca.ll     | 18 +++-----
 22 files changed, 98 insertions(+), 152 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b85239ebf08cb..d55096af8836c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U,
       getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
   MIRBuilder.buildMul(AllocSize, NumElts, TySize);
 
-  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size. This doesn't overflow because we're computing
-  // an address inside an alloca.
-  Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
-  auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
-  auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
-                                      MachineInstr::NoUWrap);
-  auto AlignCst =
-      MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
-  auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
-
+  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+  Align StackAlign = TFI->getStackAlign();
   Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
-  if (Alignment <= StackAlign)
+
+  // If the stack alignment is stricter than the alloca's alignment, ignore the
+  // alloca's alignment. We will align the size of the alloca to the stack
+  // alignment, which will guarantee that the alloca's alignment is satisfied.
+  bool IsUnderAligned = Alignment <= StackAlign;
+  if (IsUnderAligned)
     Alignment = Align(1);
-  MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+
+  // If the stack grows up, adding the alloca's size to SP without padding may
+  // leave SP not aligned (to the stack alignment) after the alloca because we
+  // align SP (to the stack align or alloca align) *before* adding the alloca
+  // size. On the other hand, if the stack grows down, we will align SP *after*
+  // decrementing it, so there is no need to pad the size.
+  if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+      IsUnderAligned) {
+    // Round the size of the allocation up to the stack alignment size
+    // by add SA-1 to the size. This doesn't overflow because we're computing
+    // an address inside an alloca.
+    auto SAMinusOne =
+        MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
+    auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+                                        MachineInstr::NoUWrap);
+    auto AlignCst =
+        MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
+    auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
+
+    MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+  } else {
+    MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment);
+  }
 
   MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
   assert(MF->getFrameInfo().hasVarSizedObjects());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 14bb1d943d2d6..fe1a8bab30e54 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
                             DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
   }
 
-  // Handle alignment.  If the requested alignment is less than or equal to
-  // the stack alignment, ignore it.  If the size is greater than or equal to
-  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
-  if (*Alignment <= StackAlign)
+  // Handle alignment. If the requested alignment is less than or equal to the
+  // stack alignment, ignore it since we will align the size. If the size is
+  // greater than or equal to the stack alignment, we note this in the
+  // DYNAMIC_STACKALLOC node.
+  const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
+  Align StackAlign = TFI->getStackAlign();
+  bool IsUnderAligned = *Alignment <= StackAlign;
+  if (IsUnderAligned)
     Alignment = std::nullopt;
 
-  const uint64_t StackAlignMask = StackAlign.value() - 1U;
-  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size. This doesn't overflow because we're computing
-  // an address inside an alloca.
-  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
-                          DAG.getConstant(StackAlignMask, dl, IntPtr),
-                          SDNodeFlags::NoUnsignedWrap);
-
-  // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
-                          DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+  // If the stack grows up, adding the alloca's size to SP without padding may
+  // leave SP not aligned (to the stack alignment) after the alloca because we
+  // align SP (to the stack align or alloca align) *before* adding the alloca
+  // size. On the other hand, if the stack grows down, we will align SP *after*
+  // decrementing it, so there is no need to align the size.
+  if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+      IsUnderAligned) {
+    const uint64_t StackAlignMask = StackAlign.value() - 1U;
+    // Round the size of the allocation up to the stack alignment size
+    // by add SA-1 to the size. This doesn't overflow because we're computing
+    // an address inside an alloca.
+    AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+                            DAG.getConstant(StackAlignMask, dl, IntPtr),
+                            SDNodeFlags::NoUnsignedWrap);
+
+    // Mask out the low bits for alignment purposes.
+    AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+                            DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+  }
 
   SDValue Ops[] = {
       getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
index 88eaa1382d1d6..0f74b51262339 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
@@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) {
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
   ; CHECK:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
-  ; CHECK:   [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
-  ; CHECK:   [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32
+  ; CHECK:   [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32
   ; CHECK:   $x0 = COPY [[DYN_STACKALLOC]](p0)
   ; CHECK:   RET_ReallyLR implicit $x0
   %addr = alloca i8, i32 %numelts, align 32
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index f49bb910b5bd1..85c6b2a2fb854 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,11 +160,7 @@ define void @quux() #1 {
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    subs x9, x9, #16
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    addvl x9, x8, #2
-; CHECK-NEXT:    mov w0, w9
-; CHECK-NEXT:    // implicit-def: $x9
-; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    and x9, x9, #0x7f0
+; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    subs x10, x10, x9
 ; CHECK-NEXT:    and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index e7687f0d3994b..b32eb210ce0e7 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    sub x9, sp, #32
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT:    add x9, x0, #15
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-DAG:     str xzr, [sp]
-; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT:    str xzr, [sp]
 ; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x9
+; CHECK-DAG:     sub x8, x8, x0
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffffc0
 ; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
@@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    b .LBB3_1
 ; CHECK-NEXT:  .LBB3_3:
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    add x9, x0, #15
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-DAG:     ldr xzr, [sp]
-; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT:    ldr xzr, [sp]
 ; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x9
+; CHECK-DAG:     sub x8, x8, x0
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffe000
 ; CHECK-NEXT:  .LBB3_4: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 2520095cce62e..3d3c5da483015 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,10 +54,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    add x9, x9, #15
-; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0
+; CHECK-NEXT:    cnth x9, all, mul #4
 ; CHECK-NEXT:    sub x8, x8, x9
 ; CHECK-NEXT:    and x0, x8, #0xffffffffffffffe0
 ; CHECK-NEXT:    mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 44281bcc3647d..8895a9a920569 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX32-NEXT:    slwi 3, 3, 2
 ; AIX32-NEXT:    lwz 4, 0(1)
 ; AIX32-NEXT:    li 5, -64
-; AIX32-NEXT:    addi 3, 3, 15
-; AIX32-NEXT:    mr 31, 1
-; AIX32-NEXT:    rlwinm 3, 3, 0, 0, 27
 ; AIX32-NEXT:    neg 3, 3
+; AIX32-NEXT:    mr 31, 1
 ; AIX32-NEXT:    and 5, 3, 5
 ; AIX32-NEXT:    stwux 4, 1, 5
 ; AIX32-NEXT:    addi 3, 1, 64
@@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX64-NEXT:    rldic 3, 3, 2, 30
 ; AIX64-NEXT:    ld 4, 0(1)
 ; AIX64-NEXT:    li 5, -64
-; AIX64-NEXT:    addi 3, 3, 15
-; AIX64-NEXT:    mr 31, 1
-; AIX64-NEXT:    rldicl 3, 3, 60, 4
-; AIX64-NEXT:    rldicl 3, 3, 4, 29
 ; AIX64-NEXT:    neg 3, 3
+; AIX64-NEXT:    mr 31, 1
 ; AIX64-NEXT:    and 5, 3, 5
 ; AIX64-NEXT:    stdux 4, 1, 5
 ; AIX64-NEXT:    addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index d1129b1825aee..8653ca997c4ed 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    li r4, -2048
 ; CHECK-LE-NEXT:    li r6, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
-; CHECK-LE-NEXT:    addi r3, r3, 15
-; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index b887bbb8c99f8..15d0cd5e9baee 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
-; CHECK-LE-NEXT:    clrldi r3, r3, 32
 ; CHECK-LE-NEXT:    lis r5, 1
 ; CHECK-LE-NEXT:    mr r31, r1
-; CHECK-LE-NEXT:    li r6, 1
 ; CHECK-LE-NEXT:    sldi r4, r4, 2
-; CHECK-LE-NEXT:    addi r3, r3, 15
+; CHECK-LE-NEXT:    li r6, 1
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
 ; CHECK-LE-NEXT:    ori r5, r5, 0
-; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    add r5, r31, r5
-; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    stwx r6, r5, r4
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    li r4, -32768
 ; CHECK-LE-NEXT:    li r6, -4096
-; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
 ; CHECK-LE-NEXT:    mr r5, r4
@@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r31, -8
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
-; CHECK-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-BE-NEXT:    lis r5, 1
-; CHECK-BE-NEXT:    addi r3, r3, 15
 ; CHECK-BE-NEXT:    mr r31, r1
 ; CHECK-BE-NEXT:    ori r5, r5, 0
-; CHECK-BE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-BE-NEXT:    add r5, r31, r5
 ; CHECK-BE-NEXT:    sldi r4, r4, 2
 ; CHECK-BE-NEXT:    li r6, 1
-; CHECK-BE-NEXT:    rldicl r3, r3, 4, 31
+; CHECK-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-BE-NEXT:    stwx r6, r5, r4
 ; CHECK-BE-NEXT:    neg r7, r3
 ; CHECK-BE-NEXT:    li r4, -32768
@@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-32-NEXT:    lis r4, 1
 ; CHECK-32-NEXT:    mr r31, r1
 ; CHECK-32-NEXT:    ori r4, r4, 0
-; CHECK-32-NEXT:    addi r3, r3, 15
 ; CHECK-32-NEXT:    add r4, r31, r4
 ; CHECK-32-NEXT:    li r5, 1
 ; CHECK-32-NEXT:    slwi r6, r6, 2
-; CHECK-32-NEXT:    rlwinm r3, r3, 0, 0, 27
 ; CHECK-32-NEXT:    neg r7, r3
 ; CHECK-32-NEXT:    stwx r5, r4, r6
 ; CHECK-32-NEXT:    li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..07daca9c7851e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    .cfi_def_cfa s0, 0
 ; RV64I-NEXT:    andi sp, sp, -64
 ; RV64I-NEXT:    mv s1, sp
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -64
 ; RV64I-NEXT:    lui a2, 1
@@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
 ; RV32I-NEXT:    andi sp, sp, -64
 ; RV32I-NEXT:    mv s1, sp
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -64
 ; RV32I-NEXT:    lui a1, 1
@@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    srli a2, sp, 13
 ; RV64I-NEXT:    slli sp, a2, 13
 ; RV64I-NEXT:    mv s1, sp
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    lui a2, 1048574
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    lui a2, 1048574
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    srli a1, sp, 13
 ; RV32I-NEXT:    slli sp, a1, 13
 ; RV32I-NEXT:    mv s1, sp
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    lui a1, 1048574
-; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    lui a1, 1048574
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index b1c0755c36ec1..70c082026bda8 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,8 +642,6 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    sw a2, 0(a1)
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -2048
 ; RV64I-NEXT:    lui a1, 1
@@ -698,11 +696,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV32I-NEXT:    add a2, s1, a2
 ; RV32I-NEXT:    add a1, a2, a1
 ; RV32I-NEXT:    li a2, 1
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    andi a0, a0, -16
-; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -2048
+; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB11_3: # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index a3dcc3779f4e6..3c469ee2f3a59 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
-; CHECK32-NEXT:    add %sp, 80, %i1
+; CHECK32-NEXT:    add %sp, 84, %i1
 ; CHECK32-NEXT:    and %i1, -64, %o0
 ; CHECK32-NEXT:    add %o0, -96, %sp
 ; CHECK32-NEXT:    add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK64-LABEL: variable_alloca_with_overalignment:
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
-; CHECK64-NEXT:    add %sp, 2159, %i1
+; CHECK64-NEXT:    add %sp, 2171, %i1
 ; CHECK64-NEXT:    and %i1, -64, %o0
 ; CHECK64-NEXT:    add %o0, -2175, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
@@ -52,8 +52,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment_2:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
-; CHECK32-NEXT:    add %i0, 7, %i0
-; CHECK32-NEXT:    and %i0, -8, %i0
 ; CHECK32-NEXT:    sub %sp, %i0, %i0
 ; CHECK32-NEXT:    add %i0, 88, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
@@ -67,14 +65,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
-; CHECK64-NEXT:    add %i0, 15, %i0
-; CHECK64-NEXT:    sethi 4194303, %i1
-; CHECK64-NEXT:    or %i1, 1008, %i1
-; CHECK64-NEXT:    sethi 0, %i2
-; CHECK64-NEXT:    or %i2, 1, %i2
-; CHECK64-NEXT:    sllx %i2, 32, %i2
-; CHECK64-NEXT:    or %i2, %i1, %i1
-; CHECK64-NEXT:    and %i0, %i1, %i0
 ; CHECK64-NEXT:    sub %sp, %i0, %i0
 ; CHECK64-NEXT:    add %i0, 2175, %i0
 ; CHECK64-NEXT:    and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index 18bb052b47d97..fffec42c2bbb9 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
 ; CHECK32:       ! %bb.0: ! %entry
 ; CHECK32-NEXT:    save %sp, -96, %sp
 ; CHECK32-NEXT:    ld [%fp+92], %o0
-; CHECK32-NEXT:    add %sp, 80, %i0
+; CHECK32-NEXT:    add %sp, 84, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
 ; CHECK32-NEXT:    call stack_realign_helper
 ; CHECK32-NEXT:    add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
 ; CHECK64-LABEL: stack_realign:
 ; CHECK64:       ! %bb.0: ! %entry
 ; CHECK64-NEXT:    save %sp, -128, %sp
-; CHECK64-NEXT:    add %sp, 2159, %i0
+; CHECK64-NEXT:    add %sp, 2171, %i0
 ; CHECK64-NEXT:    and %i0, -64, %o1
 ; CHECK64-NEXT:    add %o1, -2175, %sp
 ; CHECK64-NEXT:    add %sp, -48, %sp
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index e331bfbfb8ab3..3ad687df50295 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -114,7 +114,7 @@ define void @f5() {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    aghi %r1, -128
+; CHECK-NEXT:    aghi %r1, -124
 ; CHECK-NEXT:    la %r2, 280(%r1)
 ; CHECK-NEXT:    nill %r2, 65408
 ; CHECK-NEXT:    lgr %r15, %r1
diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
index a1002c540c14f..99c4a9fcdd8b1 100644
--- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
+++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
@@ -7,12 +7,11 @@ define void @test(i64 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s2, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
+; CHECK-NEXT:    lea %s0, __ve_grow_stack_align at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s0)
 ; CHECK-NEXT:    or %s1, -32, (0)1
+; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    lea %s0, 240(, %s11)
 ; CHECK-NEXT:    lea %s0, 31(, %s0)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
index d90c0bcf9f837..4121e9507bad3 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
@@ -407,10 +407,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    ld1b.zx %s0, (, %s0)
-; CHECK-NEXT:    st1b %s0, 272(, %s17)
-; CHECK-NEXT:    lea %s0, 15(, %s1)
-; CHECK-NEXT:    and %s0, -16, %s0
+; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    st1b %s2, 272(, %s17)
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -447,10 +446,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
 ; CHECKFP-NEXT:    monc
 ; CHECKFP-NEXT:    or %s0, 0, %s62
 ; CHECKFP-NEXT:  .LBB6_2:
-; CHECKFP-NEXT:    ld1b.zx %s0, (, %s0)
-; CHECKFP-NEXT:    st1b %s0, 272(, %s17)
-; CHECKFP-NEXT:    lea %s0, 15(, %s1)
-; CHECKFP-NEXT:    and %s0, -16, %s0
+; CHECKFP-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECKFP-NEXT:    or %s0, 0, %s1
+; CHECKFP-NEXT:    st1b %s2, 272(, %s17)
 ; CHECKFP-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECKFP-NEXT:    and %s1, %s1, (32)0
 ; CHECKFP-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
index 3a3b1ba1544c4..02a1298141265 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
@@ -180,8 +180,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -227,8 +225,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT:    lea %s0, 15(, %s0)
-; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
@@ -447,8 +443,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB5_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -499,8 +493,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT:    lea %s0, 15(, %s0)
-; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
index f9308a172ad05..20affad0fc7bd 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
@@ -103,8 +103,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    or %s2, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -149,8 +147,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT:    lea %s0, 15(, %s0)
-; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
@@ -300,8 +296,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB5_2:
 ; CHECK-NEXT:    or %s2, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -346,8 +340,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT:    lea %s0, 15(, %s0)
-; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 2c39560f02d16..2bd1c69bc521d 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -25,8 +25,6 @@ define i32 @clober_bp() {
 ; CHECK-NEXT:    .cfi_offset %edi, -12
 ; CHECK-NEXT:    movl $4, 12(%esi)
 ; CHECK-NEXT:    movl 12(%esi), %eax
-; CHECK-NEXT:    addl $3, %eax
-; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
@@ -78,8 +76,6 @@ define i32 @clobber_bpfp() {
 ; CHECK-NEXT:    .cfi_offset %edi, -12
 ; CHECK-NEXT:    movl $4, 12(%esi)
 ; CHECK-NEXT:    movl 12(%esi), %eax
-; CHECK-NEXT:    addl $3, %eax
-; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll
index 591a33446d4e3..0cdbd5721e6b1 100644
--- a/llvm/test/CodeGen/X86/pr50782.ll
+++ b/llvm/test/CodeGen/X86/pr50782.ll
@@ -25,8 +25,7 @@ define void @h(float %i) {
 ; CHECK-NEXT:    .cfi_offset %esi, -12
 ; CHECK-NEXT:    flds 8(%ebp)
 ; CHECK-NEXT:    movl _a, %ecx
-; CHECK-NEXT:    leal 3(%ecx), %eax
-; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
index ccf7e1d56da90..dc2503ecece91 100644
--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
@@ -99,8 +99,7 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
 ; CHECK-NEXT:    movl $1, (%rbx,%rdi,4)
 ; CHECK-NEXT:    movl (%rbx), %ecx
 ; CHECK-NEXT:    movq %rsp, %rax
-; CHECK-NEXT:    leaq 15(,%rcx,4), %rcx
-; CHECK-NEXT:    andq $-16, %rcx
+; CHECK-NEXT:    shlq $2, %rcx
 ; CHECK-NEXT:    subq %rcx, %rax
 ; CHECK-NEXT:    cmpq %rsp, %rax
 ; CHECK-NEXT:    jge .LBB3_3
diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
index 241188b8cc3d5..d636896467b00 100644
--- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -95,28 +95,24 @@ entry:
 
   %buf1 = alloca i8, i64 %n, align 128
 
-; M64: leaq  15(%{{.*}}), %rax
-; M64: andq  $-16, %rax
+; M64: movq  %rcx, %rax
 ; M64: callq ___chkstk_ms
 ; M64: subq  %rax, %rsp
 ; M64: movq  %rsp, [[R2:%r.*]]
 ; M64: andq  $-128, [[R2]]
 ; M64: movq  [[R2]], %rsp
 
-; W64: leaq  15(%{{.*}}), %rax
-; W64: andq  $-16, %rax
+; W64: movq  %rcx, %rax
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, [[R2:%r.*]]
 ; W64: andq  $-128, [[R2]]
 ; W64: movq  [[R2]], %rsp
 
-; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
-; EFI: andq  $-16, [[R1]]
-; EFI: movq  %rsp, [[R64:%r.*]]
-; EFI: subq  [[R1]], [[R64]]
-; EFI: andq  $-128, [[R64]]
-; EFI: movq  [[R64]], %rsp
+; EFI: movq  %rsp, %rax 
+; EFI: subq  %rcx, %rax
+; EFI: andq  $-128, %rax 
+; EFI: movq  %rax, %rsp
 
   %r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind
 
@@ -129,7 +125,7 @@ entry:
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
-; EFI: movq  [[R64]], 32(%rsp)
+; EFI: movq  %rax, 32(%rsp)
 ; EFI: callq _bar
 
   ret i64 %r

>From 5ca41197b3651b9d30bcd62affc2fadca753abb7 Mon Sep 17 00:00:00 2001
From: Jonathan Cogan <jcogan at nvidia.com>
Date: Mon, 24 Mar 2025 09:18:03 +0000
Subject: [PATCH 2/2] Revert changes to SelectionDAG.

---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 43 +++++++------------
 .../CodeGen/AArch64/sme-framelower-use-bp.ll  |  6 ++-
 .../CodeGen/AArch64/stack-probing-dynamic.ll  | 40 ++++++++++++-----
 llvm/test/CodeGen/AArch64/sve-alloca.ll       |  4 +-
 .../PowerPC/aix-framepointer-save-restore.ll  |  9 +++-
 llvm/test/CodeGen/PowerPC/pr46759.ll          |  3 ++
 .../CodeGen/PowerPC/stack-clash-prologue.ll   | 16 +++++--
 .../RISCV/rvv/stack-probing-dynamic.ll        | 12 +++++-
 .../CodeGen/RISCV/stack-clash-prologue.ll     |  6 ++-
 llvm/test/CodeGen/SPARC/alloca-align.ll       | 14 +++++-
 llvm/test/CodeGen/SPARC/stack-align.ll        |  4 +-
 llvm/test/CodeGen/SystemZ/alloca-03.ll        |  2 +-
 llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll |  9 ++--
 .../CodeGen/VE/Scalar/stackframe_align.ll     | 14 +++---
 .../test/CodeGen/VE/Scalar/stackframe_call.ll |  8 ++++
 .../CodeGen/VE/Scalar/stackframe_nocall.ll    |  8 ++++
 llvm/test/CodeGen/X86/clobber_base_ptr.ll     |  4 ++
 llvm/test/CodeGen/X86/pr50782.ll              |  3 +-
 .../stack-clash-small-alloc-medium-align.ll   |  3 +-
 .../CodeGen/X86/win64_alloca_dynalloca.ll     | 18 +++++---
 20 files changed, 154 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe1a8bab30e54..14bb1d943d2d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,35 +4447,24 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
                             DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
   }
 
-  // Handle alignment. If the requested alignment is less than or equal to the
-  // stack alignment, ignore it since we will align the size. If the size is
-  // greater than or equal to the stack alignment, we note this in the
-  // DYNAMIC_STACKALLOC node.
-  const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
-  Align StackAlign = TFI->getStackAlign();
-  bool IsUnderAligned = *Alignment <= StackAlign;
-  if (IsUnderAligned)
+  // Handle alignment.  If the requested alignment is less than or equal to
+  // the stack alignment, ignore it.  If the size is greater than or equal to
+  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+  Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
+  if (*Alignment <= StackAlign)
     Alignment = std::nullopt;
 
-  // If the stack grows up, adding the alloca's size to SP without padding may
-  // leave SP not aligned (to the stack alignment) after the alloca because we
-  // align SP (to the stack align or alloca align) *before* adding the alloca
-  // size. On the other hand, if the stack grows down, we will align SP *after*
-  // decrementing it, so there is no need to align the size.
-  if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
-      IsUnderAligned) {
-    const uint64_t StackAlignMask = StackAlign.value() - 1U;
-    // Round the size of the allocation up to the stack alignment size
-    // by add SA-1 to the size. This doesn't overflow because we're computing
-    // an address inside an alloca.
-    AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
-                            DAG.getConstant(StackAlignMask, dl, IntPtr),
-                            SDNodeFlags::NoUnsignedWrap);
-
-    // Mask out the low bits for alignment purposes.
-    AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
-                            DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
-  }
+  const uint64_t StackAlignMask = StackAlign.value() - 1U;
+  // Round the size of the allocation up to the stack alignment size
+  // by add SA-1 to the size. This doesn't overflow because we're computing
+  // an address inside an alloca.
+  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+                          DAG.getConstant(StackAlignMask, dl, IntPtr),
+                          SDNodeFlags::NoUnsignedWrap);
+
+  // Mask out the low bits for alignment purposes.
+  AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+                          DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
 
   SDValue Ops[] = {
       getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index 85c6b2a2fb854..f49bb910b5bd1 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,7 +160,11 @@ define void @quux() #1 {
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    subs x9, x9, #16
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    rdvl x9, #2
+; CHECK-NEXT:    addvl x9, x8, #2
+; CHECK-NEXT:    mov w0, w9
+; CHECK-NEXT:    // implicit-def: $x9
+; CHECK-NEXT:    mov w9, w0
+; CHECK-NEXT:    and x9, x9, #0x7f0
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    subs x10, x10, x9
 ; CHECK-NEXT:    and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index b32eb210ce0e7..ee74469c0a76a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs                                   | FileCheck %s
-; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s
+; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs                                   | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; Dynamically-sized allocation, needs a loop which can handle any size at
 ; runtime. The final iteration of the loop will temporarily put SP below the
@@ -107,10 +107,20 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    sub x9, sp, #32
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    str xzr, [sp]
-; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x0
+;
+; CHECK-SD-NEXT: add x9, x0, #15
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-DAG:  str xzr, [sp]
+; CHECK-SD-DAG:  and x9, x9, #0xfffffffffffffff0
+; CHECK-SD-NOT:  INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-SD-DAG:  mov x19, sp
+; CHECK-SD-DAG:  sub x8, x8, x9
+;
+; CHECK-GI-NEXT: mov x8, sp
+; CHECK-GI-NEXT: str xzr, [sp]
+; CHECK-GI-DAG:  mov x19, sp
+; CHECK-GI-DAG:  sub x8, x8, x0
+;
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffffc0
 ; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
@@ -164,10 +174,20 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    b .LBB3_1
 ; CHECK-NEXT:  .LBB3_3:
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ldr xzr, [sp]
-; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x0
+;
+; CHECK-SD-NEXT: add x9, x0, #15
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-DAG:  ldr xzr, [sp]
+; CHECK-SD-DAG:  and x9, x9, #0xfffffffffffffff0
+; CHECK-SD-NOT:  INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-SD-DAG:  mov x19, sp
+; CHECK-SD-DAG:  sub x8, x8, x9
+;
+; CHECK-GI-NEXT: mov x8, sp
+; CHECK-GI-NEXT: ldr xzr, [sp]
+; CHECK-GI-DAG:  mov x19, sp
+; CHECK-GI-DAG:  sub x8, x8, x0
+;
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffe000
 ; CHECK-NEXT:  .LBB3_4: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 3d3c5da483015..2520095cce62e 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,8 +54,10 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
+; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    cnth x9, all, mul #4
+; CHECK-NEXT:    add x9, x9, #15
+; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0
 ; CHECK-NEXT:    sub x8, x8, x9
 ; CHECK-NEXT:    and x0, x8, #0xffffffffffffffe0
 ; CHECK-NEXT:    mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 8895a9a920569..44281bcc3647d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,8 +82,10 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX32-NEXT:    slwi 3, 3, 2
 ; AIX32-NEXT:    lwz 4, 0(1)
 ; AIX32-NEXT:    li 5, -64
-; AIX32-NEXT:    neg 3, 3
+; AIX32-NEXT:    addi 3, 3, 15
 ; AIX32-NEXT:    mr 31, 1
+; AIX32-NEXT:    rlwinm 3, 3, 0, 0, 27
+; AIX32-NEXT:    neg 3, 3
 ; AIX32-NEXT:    and 5, 3, 5
 ; AIX32-NEXT:    stwux 4, 1, 5
 ; AIX32-NEXT:    addi 3, 1, 64
@@ -109,8 +111,11 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX64-NEXT:    rldic 3, 3, 2, 30
 ; AIX64-NEXT:    ld 4, 0(1)
 ; AIX64-NEXT:    li 5, -64
-; AIX64-NEXT:    neg 3, 3
+; AIX64-NEXT:    addi 3, 3, 15
 ; AIX64-NEXT:    mr 31, 1
+; AIX64-NEXT:    rldicl 3, 3, 60, 4
+; AIX64-NEXT:    rldicl 3, 3, 4, 29
+; AIX64-NEXT:    neg 3, 3
 ; AIX64-NEXT:    and 5, 3, 5
 ; AIX64-NEXT:    stdux 4, 1, 5
 ; AIX64-NEXT:    addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 8653ca997c4ed..d1129b1825aee 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,6 +33,9 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    li r4, -2048
 ; CHECK-LE-NEXT:    li r6, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
+; CHECK-LE-NEXT:    addi r3, r3, 15
+; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
+; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index 15d0cd5e9baee..b887bbb8c99f8 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,17 +835,20 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
 ; CHECK-LE-NEXT:    lis r5, 1
 ; CHECK-LE-NEXT:    mr r31, r1
-; CHECK-LE-NEXT:    sldi r4, r4, 2
 ; CHECK-LE-NEXT:    li r6, 1
-; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    sldi r4, r4, 2
+; CHECK-LE-NEXT:    addi r3, r3, 15
 ; CHECK-LE-NEXT:    ori r5, r5, 0
+; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    add r5, r31, r5
+; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    stwx r6, r5, r4
-; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    li r4, -32768
 ; CHECK-LE-NEXT:    li r6, -4096
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
 ; CHECK-LE-NEXT:    mr r5, r4
@@ -893,13 +896,16 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r31, -8
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
+; CHECK-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-BE-NEXT:    lis r5, 1
+; CHECK-BE-NEXT:    addi r3, r3, 15
 ; CHECK-BE-NEXT:    mr r31, r1
 ; CHECK-BE-NEXT:    ori r5, r5, 0
+; CHECK-BE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-BE-NEXT:    add r5, r31, r5
 ; CHECK-BE-NEXT:    sldi r4, r4, 2
 ; CHECK-BE-NEXT:    li r6, 1
-; CHECK-BE-NEXT:    clrldi r3, r3, 32
+; CHECK-BE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-BE-NEXT:    stwx r6, r5, r4
 ; CHECK-BE-NEXT:    neg r7, r3
 ; CHECK-BE-NEXT:    li r4, -32768
@@ -958,9 +964,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-32-NEXT:    lis r4, 1
 ; CHECK-32-NEXT:    mr r31, r1
 ; CHECK-32-NEXT:    ori r4, r4, 0
+; CHECK-32-NEXT:    addi r3, r3, 15
 ; CHECK-32-NEXT:    add r4, r31, r4
 ; CHECK-32-NEXT:    li r5, 1
 ; CHECK-32-NEXT:    slwi r6, r6, 2
+; CHECK-32-NEXT:    rlwinm r3, r3, 0, 0, 27
 ; CHECK-32-NEXT:    neg r7, r3
 ; CHECK-32-NEXT:    stwx r5, r4, r6
 ; CHECK-32-NEXT:    li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index 07daca9c7851e..c3c1643e6de01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,6 +180,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    .cfi_def_cfa s0, 0
 ; RV64I-NEXT:    andi sp, sp, -64
 ; RV64I-NEXT:    mv s1, sp
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -64
 ; RV64I-NEXT:    lui a2, 1
@@ -217,6 +219,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
 ; RV32I-NEXT:    andi sp, sp, -64
 ; RV32I-NEXT:    mv s1, sp
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -64
 ; RV32I-NEXT:    lui a1, 1
@@ -274,8 +278,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    srli a2, sp, 13
 ; RV64I-NEXT:    slli sp, a2, 13
 ; RV64I-NEXT:    mv s1, sp
-; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    addi a0, a0, 15
 ; RV64I-NEXT:    lui a2, 1048574
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -323,8 +329,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    srli a1, sp, 13
 ; RV32I-NEXT:    slli sp, a1, 13
 ; RV32I-NEXT:    mv s1, sp
-; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 15
 ; RV32I-NEXT:    lui a1, 1048574
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index 70c082026bda8..b1c0755c36ec1 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,6 +642,8 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -2048
 ; RV64I-NEXT:    lui a1, 1
@@ -696,9 +698,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV32I-NEXT:    add a2, s1, a2
 ; RV32I-NEXT:    add a1, a2, a1
 ; RV32I-NEXT:    li a2, 1
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -2048
-; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB11_3: # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index 3c469ee2f3a59..a3dcc3779f4e6 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
-; CHECK32-NEXT:    add %sp, 84, %i1
+; CHECK32-NEXT:    add %sp, 80, %i1
 ; CHECK32-NEXT:    and %i1, -64, %o0
 ; CHECK32-NEXT:    add %o0, -96, %sp
 ; CHECK32-NEXT:    add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK64-LABEL: variable_alloca_with_overalignment:
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
-; CHECK64-NEXT:    add %sp, 2171, %i1
+; CHECK64-NEXT:    add %sp, 2159, %i1
 ; CHECK64-NEXT:    and %i1, -64, %o0
 ; CHECK64-NEXT:    add %o0, -2175, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
@@ -52,6 +52,8 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment_2:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
+; CHECK32-NEXT:    add %i0, 7, %i0
+; CHECK32-NEXT:    and %i0, -8, %i0
 ; CHECK32-NEXT:    sub %sp, %i0, %i0
 ; CHECK32-NEXT:    add %i0, 88, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
@@ -65,6 +67,14 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
+; CHECK64-NEXT:    add %i0, 15, %i0
+; CHECK64-NEXT:    sethi 4194303, %i1
+; CHECK64-NEXT:    or %i1, 1008, %i1
+; CHECK64-NEXT:    sethi 0, %i2
+; CHECK64-NEXT:    or %i2, 1, %i2
+; CHECK64-NEXT:    sllx %i2, 32, %i2
+; CHECK64-NEXT:    or %i2, %i1, %i1
+; CHECK64-NEXT:    and %i0, %i1, %i0
 ; CHECK64-NEXT:    sub %sp, %i0, %i0
 ; CHECK64-NEXT:    add %i0, 2175, %i0
 ; CHECK64-NEXT:    and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index fffec42c2bbb9..18bb052b47d97 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
 ; CHECK32:       ! %bb.0: ! %entry
 ; CHECK32-NEXT:    save %sp, -96, %sp
 ; CHECK32-NEXT:    ld [%fp+92], %o0
-; CHECK32-NEXT:    add %sp, 84, %i0
+; CHECK32-NEXT:    add %sp, 80, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
 ; CHECK32-NEXT:    call stack_realign_helper
 ; CHECK32-NEXT:    add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
 ; CHECK64-LABEL: stack_realign:
 ; CHECK64:       ! %bb.0: ! %entry
 ; CHECK64-NEXT:    save %sp, -128, %sp
-; CHECK64-NEXT:    add %sp, 2171, %i0
+; CHECK64-NEXT:    add %sp, 2159, %i0
 ; CHECK64-NEXT:    and %i0, -64, %o1
 ; CHECK64-NEXT:    add %o1, -2175, %sp
 ; CHECK64-NEXT:    add %sp, -48, %sp
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index 3ad687df50295..e331bfbfb8ab3 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -114,7 +114,7 @@ define void @f5() {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    aghi %r1, -124
+; CHECK-NEXT:    aghi %r1, -128
 ; CHECK-NEXT:    la %r2, 280(%r1)
 ; CHECK-NEXT:    nill %r2, 65408
 ; CHECK-NEXT:    lgr %r15, %r1
diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
index 99c4a9fcdd8b1..a1002c540c14f 100644
--- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
+++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll
@@ -7,11 +7,12 @@ define void @test(i64 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s2, 0, %s0
-; CHECK-NEXT:    lea %s0, __ve_grow_stack_align at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
+; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
 ; CHECK-NEXT:    or %s1, -32, (0)1
-; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    lea %s0, 240(, %s11)
 ; CHECK-NEXT:    lea %s0, 31(, %s0)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
index 4121e9507bad3..d90c0bcf9f837 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll
@@ -407,9 +407,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
-; CHECK-NEXT:    or %s0, 0, %s1
-; CHECK-NEXT:    st1b %s2, 272(, %s17)
+; CHECK-NEXT:    ld1b.zx %s0, (, %s0)
+; CHECK-NEXT:    st1b %s0, 272(, %s17)
+; CHECK-NEXT:    lea %s0, 15(, %s1)
+; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -446,9 +447,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) {
 ; CHECKFP-NEXT:    monc
 ; CHECKFP-NEXT:    or %s0, 0, %s62
 ; CHECKFP-NEXT:  .LBB6_2:
-; CHECKFP-NEXT:    ld1b.zx %s2, (, %s0)
-; CHECKFP-NEXT:    or %s0, 0, %s1
-; CHECKFP-NEXT:    st1b %s2, 272(, %s17)
+; CHECKFP-NEXT:    ld1b.zx %s0, (, %s0)
+; CHECKFP-NEXT:    st1b %s0, 272(, %s17)
+; CHECKFP-NEXT:    lea %s0, 15(, %s1)
+; CHECKFP-NEXT:    and %s0, -16, %s0
 ; CHECKFP-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECKFP-NEXT:    and %s1, %s1, (32)0
 ; CHECKFP-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
index 02a1298141265..3a3b1ba1544c4 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll
@@ -180,6 +180,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -225,6 +227,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) {
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT:    lea %s0, 15(, %s0)
+; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
@@ -443,6 +447,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
 ; CHECK-NEXT:    monc
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -493,6 +499,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) {
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT:    lea %s0, 15(, %s0)
+; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
index 20affad0fc7bd..f9308a172ad05 100644
--- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
+++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll
@@ -103,6 +103,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    or %s2, 0, %s1
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -147,6 +149,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT:    lea %s0, 15(, %s0)
+; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
@@ -296,6 +300,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
 ; CHECK-NEXT:    or %s0, 0, %s62
 ; CHECK-NEXT:  .LBB5_2:
 ; CHECK-NEXT:    or %s2, 0, %s1
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
 ; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
@@ -340,6 +346,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado
 ; PIC-NEXT:    and %s15, %s15, (32)0
 ; PIC-NEXT:    sic %s16
 ; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT:    lea %s0, 15(, %s0)
+; PIC-NEXT:    and %s0, -16, %s0
 ; PIC-NEXT:    lea %s12, __ve_grow_stack_align at plt_lo(-24)
 ; PIC-NEXT:    and %s12, %s12, (32)0
 ; PIC-NEXT:    sic %s16
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 2bd1c69bc521d..2c39560f02d16 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -25,6 +25,8 @@ define i32 @clober_bp() {
 ; CHECK-NEXT:    .cfi_offset %edi, -12
 ; CHECK-NEXT:    movl $4, 12(%esi)
 ; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
@@ -76,6 +78,8 @@ define i32 @clobber_bpfp() {
 ; CHECK-NEXT:    .cfi_offset %edi, -12
 ; CHECK-NEXT:    movl $4, 12(%esi)
 ; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll
index 0cdbd5721e6b1..591a33446d4e3 100644
--- a/llvm/test/CodeGen/X86/pr50782.ll
+++ b/llvm/test/CodeGen/X86/pr50782.ll
@@ -25,7 +25,8 @@ define void @h(float %i) {
 ; CHECK-NEXT:    .cfi_offset %esi, -12
 ; CHECK-NEXT:    flds 8(%ebp)
 ; CHECK-NEXT:    movl _a, %ecx
-; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    leal 3(%ecx), %eax
+; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    calll __alloca
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    andl $-16, %eax
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
index dc2503ecece91..ccf7e1d56da90 100644
--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
@@ -99,7 +99,8 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
 ; CHECK-NEXT:    movl $1, (%rbx,%rdi,4)
 ; CHECK-NEXT:    movl (%rbx), %ecx
 ; CHECK-NEXT:    movq %rsp, %rax
-; CHECK-NEXT:    shlq $2, %rcx
+; CHECK-NEXT:    leaq 15(,%rcx,4), %rcx
+; CHECK-NEXT:    andq $-16, %rcx
 ; CHECK-NEXT:    subq %rcx, %rax
 ; CHECK-NEXT:    cmpq %rsp, %rax
 ; CHECK-NEXT:    jge .LBB3_3
diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
index d636896467b00..241188b8cc3d5 100644
--- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -95,24 +95,28 @@ entry:
 
   %buf1 = alloca i8, i64 %n, align 128
 
-; M64: movq  %rcx, %rax
+; M64: leaq  15(%{{.*}}), %rax
+; M64: andq  $-16, %rax
 ; M64: callq ___chkstk_ms
 ; M64: subq  %rax, %rsp
 ; M64: movq  %rsp, [[R2:%r.*]]
 ; M64: andq  $-128, [[R2]]
 ; M64: movq  [[R2]], %rsp
 
-; W64: movq  %rcx, %rax
+; W64: leaq  15(%{{.*}}), %rax
+; W64: andq  $-16, %rax
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, [[R2:%r.*]]
 ; W64: andq  $-128, [[R2]]
 ; W64: movq  [[R2]], %rsp
 
-; EFI: movq  %rsp, %rax 
-; EFI: subq  %rcx, %rax
-; EFI: andq  $-128, %rax 
-; EFI: movq  %rax, %rsp
+; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
+; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r.*]]
+; EFI: subq  [[R1]], [[R64]]
+; EFI: andq  $-128, [[R64]]
+; EFI: movq  [[R64]], %rsp
 
   %r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind
 
@@ -125,7 +129,7 @@ entry:
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
-; EFI: movq  %rax, 32(%rsp)
+; EFI: movq  [[R64]], 32(%rsp)
 ; EFI: callq _bar
 
   ret i64 %r



More information about the llvm-commits mailing list