[llvm] [CodeGen] Avoid aligning alloca size. (PR #132064)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 10:21:29 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: None (jcogan-nv)

<details>
<summary>Changes</summary>

GlobalIsel and SelectionDAG will emit an ADD and AND to pad the size of a dynamic `alloca` to a multiple of the stack alignment. When the alignment for the `alloca` is stricter than the stack alignment, and the stack grows down, this is not necessary because we will already clamp the stack pointer to the `alloca`'s alignment after adding the size (for example, see `LegalizerHelper::getDynStackAllocTargetPtr`). This change avoids padding the size of `alloca` when it is not necessary.

---

Patch is 30.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132064.diff


22 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+31-13) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+27-16) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll (+1-5) 
- (modified) llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll (+1-5) 
- (modified) llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll (+4-10) 
- (modified) llvm/test/CodeGen/AArch64/sve-alloca.ll (+1-3) 
- (modified) llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll (+2-7) 
- (modified) llvm/test/CodeGen/PowerPC/pr46759.ll (-3) 
- (modified) llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll (+4-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll (+2-10) 
- (modified) llvm/test/CodeGen/RISCV/stack-clash-prologue.ll (+1-5) 
- (modified) llvm/test/CodeGen/SPARC/alloca-align.ll (+2-12) 
- (modified) llvm/test/CodeGen/SPARC/stack-align.ll (+2-2) 
- (modified) llvm/test/CodeGen/SystemZ/alloca-03.ll (+1-1) 
- (modified) llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll (+4-5) 
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_align.ll (+6-8) 
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_call.ll (-8) 
- (modified) llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll (-8) 
- (modified) llvm/test/CodeGen/X86/clobber_base_ptr.ll (-4) 
- (modified) llvm/test/CodeGen/X86/pr50782.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll (+7-11) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b85239ebf08cb..d55096af8836c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U,
       getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
   MIRBuilder.buildMul(AllocSize, NumElts, TySize);
 
-  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size. This doesn't overflow because we're computing
-  // an address inside an alloca.
-  Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
-  auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
-  auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
-                                      MachineInstr::NoUWrap);
-  auto AlignCst =
-      MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
-  auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
-
+  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+  Align StackAlign = TFI->getStackAlign();
   Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
-  if (Alignment <= StackAlign)
+
+  // If the stack alignment is stricter than the alloca's alignment, ignore the
+  // alloca's alignment. We will align the size of the alloca to the stack
+  // alignment, which will guarantee that the alloca's alignment is satisfied.
+  bool IsUnderAligned = Alignment <= StackAlign;
+  if (IsUnderAligned)
     Alignment = Align(1);
-  MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+
+  // If the stack grows up, adding the alloca's size to SP without padding may
+  // leave SP not aligned (to the stack alignment) after the alloca because we
+  // align SP (to the stack align or alloca align) *before* adding the alloca
+  // size. On the other hand, if the stack grows down, we will align SP *after*
+  // decrementing it, so there is no need to pad the size.
+  if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+      IsUnderAligned) {
+    // Round the size of the allocation up to the stack alignment size
+    // by add SA-1 to the size. This doesn't overflow because we're computing
+    // an address inside an alloca.
+    auto SAMinusOne =
+        MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
+    auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+                                        MachineInstr::NoUWrap);
+    auto AlignCst =
+        MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
+    auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
+
+    MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+  } else {
+    MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment);
+  }
 
   MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
   assert(MF->getFrameInfo().hasVarSizedObjects());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 14bb1d943d2d6..fe1a8bab30e54 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
                             DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
   }
 
-  // Handle alignment.  If the requested alignment is less than or equal to
-  // the stack alignment, ignore it.  If the size is greater than or equal to
-  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
-  if (*Alignment <= StackAlign)
+  // Handle alignment. If the requested alignment is less than or equal to the
+  // stack alignment, ignore it since we will align the size. If the size is
+  // greater than or equal to the stack alignment, we note this in the
+  // DYNAMIC_STACKALLOC node.
+  const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
+  Align StackAlign = TFI->getStackAlign();
+  bool IsUnderAligned = *Alignment <= StackAlign;
+  if (IsUnderAligned)
     Alignment = std::nullopt;
 
-  const uint64_t StackAlignMask = StackAlign.value() - 1U;
-  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size. This doesn't overflow because we're computing
-  // an address inside an alloca.
-  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
-                          DAG.getConstant(StackAlignMask, dl, IntPtr),
-                          SDNodeFlags::NoUnsignedWrap);
-
-  // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
-                          DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+  // If the stack grows up, adding the alloca's size to SP without padding may
+  // leave SP not aligned (to the stack alignment) after the alloca because we
+  // align SP (to the stack align or alloca align) *before* adding the alloca
+  // size. On the other hand, if the stack grows down, we will align SP *after*
+  // decrementing it, so there is no need to align the size.
+  if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
+      IsUnderAligned) {
+    const uint64_t StackAlignMask = StackAlign.value() - 1U;
+    // Round the size of the allocation up to the stack alignment size
+    // by add SA-1 to the size. This doesn't overflow because we're computing
+    // an address inside an alloca.
+    AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+                            DAG.getConstant(StackAlignMask, dl, IntPtr),
+                            SDNodeFlags::NoUnsignedWrap);
+
+    // Mask out the low bits for alignment purposes.
+    AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+                            DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
+  }
 
   SDValue Ops[] = {
       getRoot(), AllocSize,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
index 88eaa1382d1d6..0f74b51262339 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
@@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) {
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
   ; CHECK:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
-  ; CHECK:   [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
-  ; CHECK:   [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32
+  ; CHECK:   [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32
   ; CHECK:   $x0 = COPY [[DYN_STACKALLOC]](p0)
   ; CHECK:   RET_ReallyLR implicit $x0
   %addr = alloca i8, i32 %numelts, align 32
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index f49bb910b5bd1..85c6b2a2fb854 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -160,11 +160,7 @@ define void @quux() #1 {
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    subs x9, x9, #16
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    addvl x9, x8, #2
-; CHECK-NEXT:    mov w0, w9
-; CHECK-NEXT:    // implicit-def: $x9
-; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    and x9, x9, #0x7f0
+; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    subs x10, x10, x9
 ; CHECK-NEXT:    and x10, x10, #0xffffffffffffffe0
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index e7687f0d3994b..b32eb210ce0e7 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    sub x9, sp, #32
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffc0
-; CHECK-NEXT:    add x9, x0, #15
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-DAG:     str xzr, [sp]
-; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT:    str xzr, [sp]
 ; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x9
+; CHECK-DAG:     sub x8, x8, x0
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffffc0
 ; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
@@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; CHECK-NEXT:    b .LBB3_1
 ; CHECK-NEXT:  .LBB3_3:
 ; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    add x9, x0, #15
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-DAG:     ldr xzr, [sp]
-; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0
-; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG
+; CHECK-NEXT:    ldr xzr, [sp]
 ; CHECK-DAG:     mov x19, sp
-; CHECK-DAG:     sub x8, x8, x9
+; CHECK-DAG:     sub x8, x8, x0
 ; CHECK-NEXT:    and x8, x8, #0xffffffffffffe000
 ; CHECK-NEXT:  .LBB3_4: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 2520095cce62e..3d3c5da483015 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -54,10 +54,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    add x9, x9, #15
-; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0
+; CHECK-NEXT:    cnth x9, all, mul #4
 ; CHECK-NEXT:    sub x8, x8, x9
 ; CHECK-NEXT:    and x0, x8, #0xffffffffffffffe0
 ; CHECK-NEXT:    mov sp, x0
diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index 44281bcc3647d..8895a9a920569 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX32-NEXT:    slwi 3, 3, 2
 ; AIX32-NEXT:    lwz 4, 0(1)
 ; AIX32-NEXT:    li 5, -64
-; AIX32-NEXT:    addi 3, 3, 15
-; AIX32-NEXT:    mr 31, 1
-; AIX32-NEXT:    rlwinm 3, 3, 0, 0, 27
 ; AIX32-NEXT:    neg 3, 3
+; AIX32-NEXT:    mr 31, 1
 ; AIX32-NEXT:    and 5, 3, 5
 ; AIX32-NEXT:    stwux 4, 1, 5
 ; AIX32-NEXT:    addi 3, 1, 64
@@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
 ; AIX64-NEXT:    rldic 3, 3, 2, 30
 ; AIX64-NEXT:    ld 4, 0(1)
 ; AIX64-NEXT:    li 5, -64
-; AIX64-NEXT:    addi 3, 3, 15
-; AIX64-NEXT:    mr 31, 1
-; AIX64-NEXT:    rldicl 3, 3, 60, 4
-; AIX64-NEXT:    rldicl 3, 3, 4, 29
 ; AIX64-NEXT:    neg 3, 3
+; AIX64-NEXT:    mr 31, 1
 ; AIX64-NEXT:    and 5, 3, 5
 ; AIX64-NEXT:    stdux 4, 1, 5
 ; AIX64-NEXT:    addi 3, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index d1129b1825aee..8653ca997c4ed 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    li r4, -2048
 ; CHECK-LE-NEXT:    li r6, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
-; CHECK-LE-NEXT:    addi r3, r3, 15
-; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index b887bbb8c99f8..15d0cd5e9baee 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
-; CHECK-LE-NEXT:    clrldi r3, r3, 32
 ; CHECK-LE-NEXT:    lis r5, 1
 ; CHECK-LE-NEXT:    mr r31, r1
-; CHECK-LE-NEXT:    li r6, 1
 ; CHECK-LE-NEXT:    sldi r4, r4, 2
-; CHECK-LE-NEXT:    addi r3, r3, 15
+; CHECK-LE-NEXT:    li r6, 1
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
 ; CHECK-LE-NEXT:    ori r5, r5, 0
-; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    add r5, r31, r5
-; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    stwx r6, r5, r4
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    li r4, -32768
 ; CHECK-LE-NEXT:    li r6, -4096
-; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
 ; CHECK-LE-NEXT:    and r4, r5, r4
 ; CHECK-LE-NEXT:    mr r5, r4
@@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r31, -8
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
-; CHECK-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-BE-NEXT:    lis r5, 1
-; CHECK-BE-NEXT:    addi r3, r3, 15
 ; CHECK-BE-NEXT:    mr r31, r1
 ; CHECK-BE-NEXT:    ori r5, r5, 0
-; CHECK-BE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-BE-NEXT:    add r5, r31, r5
 ; CHECK-BE-NEXT:    sldi r4, r4, 2
 ; CHECK-BE-NEXT:    li r6, 1
-; CHECK-BE-NEXT:    rldicl r3, r3, 4, 31
+; CHECK-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-BE-NEXT:    stwx r6, r5, r4
 ; CHECK-BE-NEXT:    neg r7, r3
 ; CHECK-BE-NEXT:    li r4, -32768
@@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-32-NEXT:    lis r4, 1
 ; CHECK-32-NEXT:    mr r31, r1
 ; CHECK-32-NEXT:    ori r4, r4, 0
-; CHECK-32-NEXT:    addi r3, r3, 15
 ; CHECK-32-NEXT:    add r4, r31, r4
 ; CHECK-32-NEXT:    li r5, 1
 ; CHECK-32-NEXT:    slwi r6, r6, 2
-; CHECK-32-NEXT:    rlwinm r3, r3, 0, 0, 27
 ; CHECK-32-NEXT:    neg r7, r3
 ; CHECK-32-NEXT:    stwx r5, r4, r6
 ; CHECK-32-NEXT:    li r4, -32768
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..07daca9c7851e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    .cfi_def_cfa s0, 0
 ; RV64I-NEXT:    andi sp, sp, -64
 ; RV64I-NEXT:    mv s1, sp
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -64
 ; RV64I-NEXT:    lui a2, 1
@@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
 ; RV32I-NEXT:    andi sp, sp, -64
 ; RV32I-NEXT:    mv s1, sp
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -64
 ; RV32I-NEXT:    lui a1, 1
@@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV64I-NEXT:    srli a2, sp, 13
 ; RV64I-NEXT:    slli sp, a2, 13
 ; RV64I-NEXT:    mv s1, sp
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    lui a2, 1048574
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    lui a2, 1048574
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; RV32I-NEXT:    srli a1, sp, 13
 ; RV32I-NEXT:    slli sp, a1, 13
 ; RV32I-NEXT:    mv s1, sp
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    lui a1, 1048574
-; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    lui a1, 1048574
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index b1c0755c36ec1..70c082026bda8 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -642,8 +642,6 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    sw a2, 0(a1)
-; RV64I-NEXT:    addi a0, a0, 15
-; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
 ; RV64I-NEXT:    andi a0, a0, -2048
 ; RV64I-NEXT:    lui a1, 1
@@ -698,11 +696,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; RV32I-NEXT:    add a2, s1, a2
 ; RV32I-NEXT:    add a1, a2, a1
 ; RV32I-NEXT:    li a2, 1
-; RV32I-NEXT:    addi a0, a0, 15
-; RV32I-NEXT:    andi a0, a0, -16
-; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    sub a0, sp, a0
 ; RV32I-NEXT:    andi a0, a0, -2048
+; RV32I-NEXT:    sw a2, 0(a1)
 ; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB11_3: # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    sub sp, sp, a1
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
index a3dcc3779f4e6..3c469ee2f3a59 100644
--- a/llvm/test/CodeGen/SPARC/alloca-align.ll
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
-; CHECK32-NEXT:    add %sp, 80, %i1
+; CHECK32-NEXT:    add %sp, 84, %i1
 ; CHECK32-NEXT:    and %i1, -64, %o0
 ; CHECK32-NEXT:    add %o0, -96, %sp
 ; CHECK32-NEXT:    add %i0, 7, %i0
@@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind {
 ; CHECK64-LABEL: variable_alloca_with_overalignment:
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
-; CHECK64-NEXT:    add %sp, 2159, %i1
+; CHECK64-NEXT:    add %sp, 2171, %i1
 ; CHECK64-NEXT:    and %i1, -64, %o0
 ; CHECK64-NEXT:    add %o0, -2175, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
@@ -52,8 +52,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK32-LABEL: variable_alloca_with_overalignment_2:
 ; CHECK32:       ! %bb.0:
 ; CHECK32-NEXT:    save %sp, -96, %sp
-; CHECK32-NEXT:    add %i0, 7, %i0
-; CHECK32-NEXT:    and %i0, -8, %i0
 ; CHECK32-NEXT:    sub %sp, %i0, %i0
 ; CHECK32-NEXT:    add %i0, 88, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
@@ -67,14 +65,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
 ; CHECK64:       ! %bb.0:
 ; CHECK64-NEXT:    save %sp, -128, %sp
 ; CHECK64-NEXT:    srl %i0, 0, %i0
-; CHECK64-NEXT:    add %i0, 15, %i0
-; CHECK64-NEXT:    sethi 4194303, %i1
-; CHECK64-NEXT:    or %i1, 1008, %i1
-; CHECK64-NEXT:    sethi 0, %i2
-; CHECK64-NEXT:    or %i2, 1, %i2
-; CHECK64-NEXT:    sllx %i2, 32, %i2
-; CHECK64-NEXT:    or %i2, %i1, %i1
-; CHECK64-NEXT:    and %i0, %i1, %i0
 ; CHECK64-NEXT:    sub %sp, %i0, %i0
 ; CHECK64-NEXT:    add %i0, 2175, %i0
 ; CHECK64-NEXT:    and %i0, -64, %o1
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index 18bb052b47d97..fffec42c2bbb9 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %
 ; CHECK32:       ! %bb.0: ! %entry
 ; CHECK32-NEXT:    save %sp, -96, %sp
 ; CHECK32-NEXT:    ld [%fp+92], %o0
-; CHECK32-NEXT:    add %sp, 80, %i0
+; CHECK32-NEXT:    add %sp, 84, %i0
 ; CHECK32-NEXT:    and %i0, -64, %o1
 ; CHECK32-NEXT:    call stack_realign_helper
 ; CHECK32-NEXT:    add %o1, -96, %sp
@@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/132064


More information about the llvm-commits mailing list