[llvm] [Inliner] Fix Issue #45778: Inliner now respects the alignment of parameters passed by value (PR #137455)

via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 26 09:27:30 PDT 2025


https://github.com/sallto updated https://github.com/llvm/llvm-project/pull/137455

>From a20620a96673bfd8c228b17b8e24b00d52f6025a Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Sat, 26 Apr 2025 13:22:57 +0200
Subject: [PATCH] [Inliner] Fix Issue #45778: Inliner now respects the
 alignment of parameters passed by value

---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 27 +++++++++++--------
 llvm/test/Transforms/Inline/byval-align.ll    |  2 +-
 .../Inline/byval-with-non-alloca-addrspace.ll |  2 +-
 llvm/test/Transforms/Inline/byval.ll          |  6 ++---
 .../Inline/inline-deferred-instsimplify.ll    |  2 +-
 llvm/test/Transforms/Inline/inline-tail.ll    |  4 +--
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 295518fef687d..7a91620af8272 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1703,7 +1703,8 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
 }
 
 static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
-                                    Module *M, BasicBlock *InsertBlock,
+                                    MaybeAlign SrcAlign, Module *M,
+                                    BasicBlock *InsertBlock,
                                     InlineFunctionInfo &IFI,
                                     Function *CalledFunc) {
   IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
@@ -1711,11 +1712,10 @@ static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
   Value *Size =
       Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
 
-  // Always generate a memcpy of alignment 1 here because we don't know
-  // the alignment of the src pointer.  Other optimizations can infer
-  // better alignment.
-  CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
-                                      /*SrcAlign*/ Align(1), Size);
+  Align DstAlign = Dst->getPointerAlignment(M->getDataLayout());
+
+  // Generate a memcpy with the correct alignments.
+  CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
 
   // The verifier requires that all calls of debug-info-bearing functions
   // from debug-info-bearing functions have a debug location (for inlining
@@ -2629,9 +2629,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
     struct ByValInit {
       Value *Dst;
       Value *Src;
+      MaybeAlign SrcAlign;
       Type *Ty;
     };
-    // Keep a list of pair (dst, src) to emit byval initializations.
+    // Keep a list of tuples (dst, src, src_align) to emit byval
+    // initializations. Src Alignment is only available though the callbase,
+    // therefore has to be saved.
     SmallVector<ByValInit, 4> ByValInits;
 
     // When inlining a function that contains noalias scope metadata,
@@ -2661,8 +2664,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
                                         &CB, CalledFunc, IFI,
                                         CalledFunc->getParamAlign(ArgNo));
         if (ActualArg != *AI)
-          ByValInits.push_back(
-              {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
+          ByValInits.push_back({ActualArg, (Value *)*AI,
+                                CB.getParamAlign(ArgNo),
+                                CB.getParamByValType(ArgNo)});
       }
 
       VMap[&*I] = ActualArg;
@@ -2712,8 +2716,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
 
     // Inject byval arguments initialization.
     for (ByValInit &Init : ByValInits)
-      HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
-                              &*FirstNewBlock, IFI, CalledFunc);
+      HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Init.SrcAlign,
+                              Caller->getParent(), &*FirstNewBlock, IFI,
+                              CalledFunc);
 
     std::optional<OperandBundleUse> ParentDeopt =
         CB.getOperandBundle(LLVMContext::OB_deopt);
diff --git a/llvm/test/Transforms/Inline/byval-align.ll b/llvm/test/Transforms/Inline/byval-align.ll
index 766094f05be0c..0b135aa570a72 100644
--- a/llvm/test/Transforms/Inline/byval-align.ll
+++ b/llvm/test/Transforms/Inline/byval-align.ll
@@ -29,7 +29,7 @@ define void @byval_caller(ptr nocapture align 64 %a, ptr %b) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A1:%.*]] = alloca float, align 128
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[A1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[A1]], ptr align 1 [[A]], i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[A1]], ptr align 128 [[A]], i64 4, i1 false)
 ; CHECK-NEXT:    [[LOAD_I:%.*]] = load float, ptr [[A1]], align 4
 ; CHECK-NEXT:    [[B_IDX_I:%.*]] = getelementptr inbounds float, ptr [[B]], i64 8
 ; CHECK-NEXT:    [[ADD_I:%.*]] = fadd float [[LOAD_I]], 2.000000e+00
diff --git a/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll b/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
index 42ec0f2bf5699..1d1cb459d53b6 100644
--- a/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
+++ b/llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll
@@ -27,7 +27,7 @@ define i64 @foo(ptr %arg) {
 ; CHECK-SAME: ptr [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ARG1:%.*]] = alloca [[STRUCT:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr [[ARG1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ARG1]], ptr align 1 [[ARG]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ARG1]], ptr align 8 [[ARG]], i64 16, i1 false)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT]], ptr [[ARG1]], i64 0, i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr [[ARG1]])
diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll
index d98f6e8efa05d..b4a19c55c20a0 100644
--- a/llvm/test/Transforms/Inline/byval.ll
+++ b/llvm/test/Transforms/Inline/byval.ll
@@ -36,7 +36,7 @@ define i32 @test1() nounwind  {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[S1]], ptr [[S]], i64 12, i1 false)
 ; CHECK-NEXT:    [[TMP1_I:%.*]] = load i32, ptr [[S1]], align 4
 ; CHECK-NEXT:    [[TMP2_I:%.*]] = add i32 [[TMP1_I]], 1
 ; CHECK-NEXT:    store i32 [[TMP2_I]], ptr [[S1]], align 4
@@ -105,7 +105,7 @@ define void @test3() nounwind  {
 ; CHECK-NEXT:    [[S1:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS]], align 1
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 64 [[S1]], ptr align 64 [[S]], i64 12, i1 false)
 ; CHECK-NEXT:    call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
 ; CHECK-NEXT:    ret void
@@ -158,7 +158,7 @@ define i32 @test5() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_S0:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[B]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[B]], ptr align 1 @b, i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[B]], ptr align 4 @b, i64 4, i1 false)
 ; CHECK-NEXT:    store i32 0, ptr @b, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr @a, align 4
diff --git a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
index f02d03688f039..c74351b300399 100644
--- a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
+++ b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
@@ -53,7 +53,7 @@ define i32 @main() {
 ; CHECK-LABEL: define i32 @main() {
 ; CHECK-NEXT:    [[G_VAR:%.*]] = alloca [[STRUCT_A:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 20, ptr [[G_VAR]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[G_VAR]], ptr align 1 @g_var, i64 20, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[G_VAR]], ptr align 8 @g_var, i64 20, i1 false)
 ; CHECK-NEXT:    [[VAL_I:%.*]] = load i32, ptr [[G_VAR]], align 8
 ; CHECK-NEXT:    [[DOTNOT_I:%.*]] = icmp eq i32 [[VAL_I]], 0
 ; CHECK-NEXT:    br i1 [[DOTNOT_I]], label [[CHECK_POINTERS_ARE_EQUAL_I:%.*]], label [[STORE_PTR_IN_GVAR_I:%.*]]
diff --git a/llvm/test/Transforms/Inline/inline-tail.ll b/llvm/test/Transforms/Inline/inline-tail.ll
index cbee89c87f17b..0bfd0565eef57 100644
--- a/llvm/test/Transforms/Inline/inline-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-tail.ll
@@ -65,7 +65,7 @@ define void @test_byval_a(ptr byval(i32) %p) {
 ; CHECK-SAME: (ptr byval(i32) [[P:%.*]]) {
 ; CHECK-NEXT:    [[P1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
 ; CHECK-NEXT:    musttail call void @test_byval_c(ptr byval(i32) [[P1]])
 ; CHECK-NEXT:    ret void
 ;
@@ -88,7 +88,7 @@ define void @test_dynalloca_a(ptr byval(i32) %p, i32 %n) {
 ; CHECK-NEXT:    [[P1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
 ; CHECK-NEXT:    [[BUF_I:%.*]] = alloca i8, i32 [[N]], align 1
 ; CHECK-NEXT:    call void @escape(ptr [[BUF_I]])
 ; CHECK-NEXT:    musttail call void @test_dynalloca_c(ptr byval(i32) [[P1]], i32 [[N]])



More information about the llvm-commits mailing list