[clang] Revert "[clang] Avoid memcopy for small structure with padding under … (PR #84230)
Antonio Frighetto via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 6 14:52:01 PST 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/84230
>From 91ca7b2e5c98a7caa8a97f05f57e84f68d861fa3 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 6 Mar 2024 23:49:40 +0100
Subject: [PATCH] [clang][CodeGen] Allow memcpy replace with trivial auto var
init
Fixes: https://github.com/llvm/llvm-project/issues/84178.
---
clang/lib/CodeGen/CGDecl.cpp | 43 ++++++++++++++---------
clang/test/CodeGen/aapcs-align.cpp | 4 +--
clang/test/CodeGen/aapcs64-align.cpp | 8 ++---
clang/test/CodeGen/attr-counted-by.c | 26 ++++----------
clang/test/CodeGenCXX/auto-var-init.cpp | 6 ++--
clang/test/CodeGenOpenCL/amdgpu-printf.cl | 9 +----
clang/test/OpenMP/bug54082.c | 4 +--
7 files changed, 43 insertions(+), 57 deletions(-)
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..aa9997b87ecfa7 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1241,27 +1241,38 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
return;
}
- // If the initializer is small, use a handful of stores.
+ // If the initializer is small or trivialAutoVarInit is set, use a handful of
+ // stores.
+ bool IsTrivialAutoVarInitPattern =
+ CGM.getContext().getLangOpts().getTrivialAutoVarInit() ==
+ LangOptions::TrivialAutoVarInitKind::Pattern;
if (shouldSplitConstantStore(CGM, ConstantSize)) {
if (auto *STy = dyn_cast<llvm::StructType>(Ty)) {
- const llvm::StructLayout *Layout =
- CGM.getDataLayout().getStructLayout(STy);
- for (unsigned i = 0; i != constant->getNumOperands(); i++) {
- CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i));
- Address EltPtr = Builder.CreateConstInBoundsByteGEP(
- Loc.withElementType(CGM.Int8Ty), CurOff);
- emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
- constant->getAggregateElement(i), IsAutoInit);
+ if (STy == Loc.getElementType() ||
+ (STy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
+ const llvm::StructLayout *Layout =
+ CGM.getDataLayout().getStructLayout(STy);
+ for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+ CharUnits CurOff =
+ CharUnits::fromQuantity(Layout->getElementOffset(i));
+ Address EltPtr = Builder.CreateConstInBoundsByteGEP(
+ Loc.withElementType(CGM.Int8Ty), CurOff);
+ emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+ constant->getAggregateElement(i), IsAutoInit);
+ }
+ return;
}
- return;
} else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) {
- for (unsigned i = 0; i != ATy->getNumElements(); i++) {
- Address EltPtr = Builder.CreateConstGEP(
- Loc.withElementType(ATy->getElementType()), i);
- emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
- constant->getAggregateElement(i), IsAutoInit);
+ if (ATy == Loc.getElementType() ||
+ (ATy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
+ for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+ Address EltPtr = Builder.CreateConstGEP(
+ Loc.withElementType(ATy->getElementType()), i);
+ emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+ constant->getAggregateElement(i), IsAutoInit);
+ }
+ return;
}
- return;
}
}
diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index 2886a32974b066..4f393d9e6b7f32 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -134,8 +134,8 @@ void g6() {
f6m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
// CHECK: declare void @f6(i32 noundef, [4 x i32])
// CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32])
}
diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp
index 759413cbc4b56f..de231f2123b975 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -75,8 +75,8 @@ void g4() {
f4m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g4()
-// CHECK: call void @f4(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f4(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
// CHECK: declare void @f4(i32 noundef, [2 x i64])
// CHECK: declare void @f4m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
@@ -95,8 +95,8 @@ void f5m(int, int, int, int, int, P16);
f5m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g5()
-// CHECK: call void @f5(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f5(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
// CHECK: declare void @f5(i32 noundef, [2 x i64])
// CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index e5685e39173b0f..1fb39f9a346667 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -1314,17 +1314,10 @@ int test14(int idx) {
// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15(
// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR4]] {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
-// NO-SANITIZE-WITH-ATTR-NEXT: [[FOO:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[FOO]]) #[[ATTR12]]
-// NO-SANITIZE-WITH-ATTR-NEXT: store i32 1, ptr [[FOO]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4
-// NO-SANITIZE-WITH-ATTR-NEXT: store i32 2, ptr [[TMP0]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT: [[BLAH:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8
// NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[BLAH]], i64 0, i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[FOO]]) #[[ATTR12]]
-// NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP1]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr getelementptr inbounds ([[STRUCT_ANON_8:%.*]], ptr @__const.test15.foo, i64 1, i32 0), i64 0, i64 [[IDXPROM]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP0]]
//
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15(
// SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
@@ -1342,17 +1335,10 @@ int test14(int idx) {
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15(
// NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[FOO:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[FOO]]) #[[ATTR9]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 1, ptr [[FOO]], align 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 2, ptr [[TMP0]], align 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BLAH:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[BLAH]], i64 0, i64 [[IDXPROM]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[FOO]]) #[[ATTR9]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP1]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr getelementptr inbounds ([[STRUCT_ANON_8:%.*]], ptr @__const.test15.foo, i64 1, i32 0), i64 0, i64 [[IDXPROM]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]]
//
int test15(int idx) {
struct {
diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp
index e5a9d015f22f27..68fb2de0c2b452 100644
--- a/clang/test/CodeGenCXX/auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/auto-var-init.cpp
@@ -1504,9 +1504,7 @@ TEST_CUSTOM(unmatchedreverse, unmatchedreverse, { .c = 42 });
// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}}
// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 3
// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}}
-// ZERO-O1: store i8 42, ptr {{.*}}, align 4
-// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1
-// ZERO-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 0, i64 3, {{.*}})
+// ZERO-O1: store i32 42, ptr {{.*}}, align 4
TEST_UNINIT(unmatchedfp, unmatchedfp);
// CHECK-LABEL: @test_unmatchedfp_uninit()
@@ -1531,7 +1529,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 });
// CHECK-O0: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
-// CHECK-O1: store double 0x400921FB54442D18, ptr %custom, align 8
+// CHECK-O1: store i64 4614256656552045848, ptr %custom, align 8
// CHECK-NOT: !annotation
TEST_UNINIT(emptyenum, emptyenum);
diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
index 6c84485b66b4a0..edf6dbf8657cbe 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
@@ -30,14 +30,7 @@ __kernel void test_printf_int(int i) {
// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT: [[LOC0:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 0
-// CHECK-NEXT: store i8 102, ptr addrspace(5) [[LOC0]], align 1
-// CHECK-NEXT: [[LOC1:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 1
-// CHECK-NEXT: store i8 111, ptr addrspace(5) [[LOC1]], align 1
-// CHECK-NEXT: [[LOC2:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 2
-// CHECK-NEXT: store i8 111, ptr addrspace(5) [[LOC2]], align 1
-// CHECK-NEXT: [[LOC3:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 3
-// CHECK-NEXT: store i8 0, ptr addrspace(5) [[LOC3]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]]
diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c
index b88b68fd43012a..337c120983e0a3 100644
--- a/clang/test/OpenMP/bug54082.c
+++ b/clang/test/OpenMP/bug54082.c
@@ -69,9 +69,7 @@ void foo() {
// CHECK-NEXT: [[X_TRAITS:%.*]] = alloca [1 x %struct.omp_alloctrait_t], align 16
// CHECK-NEXT: [[X_ALLOC:%.*]] = alloca i64, align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[X_TRAITS]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT: store i32 2, ptr [[X_TRAITS]], align 16
-// CHECK-NEXT: [[LOC0:%.*]] = getelementptr inbounds i8, ptr [[X_TRAITS]], i64 8
-// CHECK-NEXT: store i64 64, ptr [[LOC0]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false)
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[X_ALLOC]]) #[[ATTR5]]
// CHECK-NEXT: [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]]
// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3:![0-9]+]]
More information about the cfe-commits
mailing list