[llvm] [Inliner] Fix Issue #45778: Inliner now respects the alignment of parameters passed by value (PR #137455)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 26 09:27:30 PDT 2025
https://github.com/sallto updated https://github.com/llvm/llvm-project/pull/137455
>From a20620a96673bfd8c228b17b8e24b00d52f6025a Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Sat, 26 Apr 2025 13:22:57 +0200
Subject: [PATCH] [Inliner] Fix Issue #45778: Inliner now respects the
alignment of parameters passed by value
---
llvm/lib/Transforms/Utils/InlineFunction.cpp | 27 +++++++++++--------
llvm/test/Transforms/Inline/byval-align.ll | 2 +-
.../Inline/byval-with-non-alloca-addrspace.ll | 2 +-
llvm/test/Transforms/Inline/byval.ll | 6 ++---
.../Inline/inline-deferred-instsimplify.ll | 2 +-
llvm/test/Transforms/Inline/inline-tail.ll | 4 +--
6 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 295518fef687d..7a91620af8272 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1703,7 +1703,8 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
}
static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
- Module *M, BasicBlock *InsertBlock,
+ MaybeAlign SrcAlign, Module *M,
+ BasicBlock *InsertBlock,
InlineFunctionInfo &IFI,
Function *CalledFunc) {
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
@@ -1711,11 +1712,10 @@ static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
Value *Size =
Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
- /*SrcAlign*/ Align(1), Size);
+ Align DstAlign = Dst->getPointerAlignment(M->getDataLayout());
+
+ // Generate a memcpy with the correct alignments.
+ CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
// The verifier requires that all calls of debug-info-bearing functions
// from debug-info-bearing functions have a debug location (for inlining
@@ -2629,9 +2629,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
struct ByValInit {
Value *Dst;
Value *Src;
+ MaybeAlign SrcAlign;
Type *Ty;
};
- // Keep a list of pair (dst, src) to emit byval initializations.
+ // Keep a list of tuples (dst, src, src_align) to emit byval
+ // initializations. Src Alignment is only available though the callbase,
+ // therefore has to be saved.
SmallVector<ByValInit, 4> ByValInits;
// When inlining a function that contains noalias scope metadata,
@@ -2661,8 +2664,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
&CB, CalledFunc, IFI,
CalledFunc->getParamAlign(ArgNo));
if (ActualArg != *AI)
- ByValInits.push_back(
- {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
+ ByValInits.push_back({ActualArg, (Value *)*AI,
+ CB.getParamAlign(ArgNo),
+ CB.getParamByValType(ArgNo)});
}
VMap[&*I] = ActualArg;
@@ -2712,8 +2716,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Inject byval arguments initialization.
for (ByValInit &Init : ByValInits)
- HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
- &*FirstNewBlock, IFI, CalledFunc);
+ HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Init.SrcAlign,
+ Caller->getParent(), &*FirstNewBlock, IFI,
+ CalledFunc);
std::optional<OperandBundleUse> ParentDeopt =
CB.getOperandBundle(LLVMContext::OB_deopt);
diff --git a/llvm/test/Transforms/Inline/byval-align.ll b/llvm/test/Transforms/Inline/byval-align.ll
index 766094f05be0c..0b135aa570a72 100644
--- a/llvm/test/Transforms/Inline/byval-align.ll
+++ b/llvm/test/Transforms/Inline/byval-align.ll
@@ -29,7 +29,7 @@ define void @byval_caller(ptr nocapture align 64 %a, ptr %b) #0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = alloca float, align 128
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[A1]], ptr align 1 [[A]], i64 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[A1]], ptr align 128 [[A]], i64 4, i1 false)
; CHECK-NEXT: [[LOAD_I:%.*]] = load float, ptr [[A1]], align 4
; CHECK-NEXT: [[B_IDX_I:%.*]] = getelementptr inbounds float, ptr [[B]], i64 8
; CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[LOAD_I]], 2.000000e+00
diff --git a/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll b/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
index 42ec0f2bf5699..1d1cb459d53b6 100644
--- a/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
+++ b/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
@@ -27,7 +27,7 @@ define i64 @foo(ptr %arg) {
; CHECK-SAME: ptr [[ARG:%.*]]) {
; CHECK-NEXT: [[ARG1:%.*]] = alloca [[STRUCT:%.*]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[ARG1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ARG1]], ptr align 1 [[ARG]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ARG1]], ptr align 8 [[ARG]], i64 16, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT]], ptr [[ARG1]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[ARG1]])
diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll
index d98f6e8efa05d..b4a19c55c20a0 100644
--- a/llvm/test/Transforms/Inline/byval.ll
+++ b/llvm/test/Transforms/Inline/byval.ll
@@ -36,7 +36,7 @@ define i32 @test1() nounwind {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, ptr [[TMP4]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[S1]], ptr [[S]], i64 12, i1 false)
; CHECK-NEXT: [[TMP1_I:%.*]] = load i32, ptr [[S1]], align 4
; CHECK-NEXT: [[TMP2_I:%.*]] = add i32 [[TMP1_I]], 1
; CHECK-NEXT: store i32 [[TMP2_I]], ptr [[S1]], align 4
@@ -105,7 +105,7 @@ define void @test3() nounwind {
; CHECK-NEXT: [[S1:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 64 [[S1]], ptr align 64 [[S]], i64 12, i1 false)
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
; CHECK-NEXT: ret void
@@ -158,7 +158,7 @@ define i32 @test5() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_S0:%.*]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[B]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[B]], ptr align 1 @b, i64 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[B]], ptr align 4 @b, i64 4, i1 false)
; CHECK-NEXT: store i32 0, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr @a, align 4
diff --git a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
index f02d03688f039..c74351b300399 100644
--- a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
+++ b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
@@ -53,7 +53,7 @@ define i32 @main() {
; CHECK-LABEL: define i32 @main() {
; CHECK-NEXT: [[G_VAR:%.*]] = alloca [[STRUCT_A:%.*]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[G_VAR]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[G_VAR]], ptr align 1 @g_var, i64 20, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[G_VAR]], ptr align 8 @g_var, i64 20, i1 false)
; CHECK-NEXT: [[VAL_I:%.*]] = load i32, ptr [[G_VAR]], align 8
; CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[VAL_I]], 0
; CHECK-NEXT: br i1 [[DOTNOT_I]], label [[CHECK_POINTERS_ARE_EQUAL_I:%.*]], label [[STORE_PTR_IN_GVAR_I:%.*]]
diff --git a/llvm/test/Transforms/Inline/inline-tail.ll b/llvm/test/Transforms/Inline/inline-tail.ll
index cbee89c87f17b..0bfd0565eef57 100644
--- a/llvm/test/Transforms/Inline/inline-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-tail.ll
@@ -65,7 +65,7 @@ define void @test_byval_a(ptr byval(i32) %p) {
; CHECK-SAME: (ptr byval(i32) [[P:%.*]]) {
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
; CHECK-NEXT: musttail call void @test_byval_c(ptr byval(i32) [[P1]])
; CHECK-NEXT: ret void
;
@@ -88,7 +88,7 @@ define void @test_dynalloca_a(ptr byval(i32) %p, i32 %n) {
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
; CHECK-NEXT: [[BUF_I:%.*]] = alloca i8, i32 [[N]], align 1
; CHECK-NEXT: call void @escape(ptr [[BUF_I]])
; CHECK-NEXT: musttail call void @test_dynalloca_c(ptr byval(i32) [[P1]], i32 [[N]])
More information about the llvm-commits
mailing list