[PATCH] D132233: [CGP][ARM] Dont align memcpy args when optimization for size
Dave Green via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 19 07:38:16 PDT 2022
dmgreen created this revision.
dmgreen added reviewers: samtebbs, john.brawn, SjoerdMeijer.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
dmgreen requested review of this revision.
Herald added a project: LLVM.
This was added back in D7908 <https://reviews.llvm.org/D7908>, to align memcpy args. It should be limited when optimizing for size to prevent extra unnecessary padding being added. It seems to only currently be used under arm.
https://reviews.llvm.org/D132233
Files:
llvm/lib/CodeGen/CodeGenPrepare.cpp
llvm/test/CodeGen/ARM/memcpy-no-inline.ll
llvm/test/CodeGen/ARM/memfunc.ll
Index: llvm/test/CodeGen/ARM/memfunc.ll
===================================================================
--- llvm/test/CodeGen/ARM/memfunc.ll
+++ llvm/test/CodeGen/ARM/memfunc.ll
@@ -377,6 +377,45 @@
ret void
}
+; Check that alloca arguments are not aligned when the function is minsize
+define void @fminsize(i8* %dest, i32 %n) "frame-pointer"="all" minsize {
+entry:
+ ; CHECK-LABEL: fminsize
+
+ ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #17}}
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
+ %arr0 = alloca [9 x i8], align 1
+ %0 = bitcast [9 x i8]* %arr0 to i8*
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #26}}
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
+ %arr1 = alloca [9 x i8], align 1
+ %1 = bitcast [9 x i8]* %arr1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #35}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: bl _memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
+ %arr2 = alloca [9 x i8], align 1
+ %2 = bitcast [9 x i8]* %arr2 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
+
+ ret void
+}
+
; Check that global variables are aligned if they are large enough, but only if
; they are defined in this object and don't have an explicit section.
@arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1
Index: llvm/test/CodeGen/ARM/memcpy-no-inline.ll
===================================================================
--- llvm/test/CodeGen/ARM/memcpy-no-inline.ll
+++ llvm/test/CodeGen/ARM/memcpy-no-inline.ll
@@ -4,6 +4,7 @@
@.str = private unnamed_addr constant [31 x i8] c"012345678901234567890123456789\00", align 1
@.str.1 = private unnamed_addr constant [21 x i8] c"01234567890123456789\00", align 1
+ at .str.2 = private unnamed_addr constant [21 x i8] c"01234567890123456789\00", align 4
@myglobal = common global %struct.mystruct zeroinitializer, align 1
@@ -21,13 +22,23 @@
define void @bar() #0 {
entry:
; CHECK-LABEL: bar:
-; CHECK-NOT: __aeabi_memcpy
+; CHECK: __aeabi_memcpy
%mystring = alloca [31 x i8], align 1
%0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i1 false)
ret void
}
+define void @bar2() #0 {
+entry:
+; CHECK-LABEL: bar2:
+; CHECK-NOT: __aeabi_memcpy
+ %mystring = alloca [32 x i8], align 4
+ %0 = getelementptr inbounds [32 x i8], [32 x i8]* %mystring, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.2, i32 0, i32 0), i32 21, i1 false)
+ ret void
+}
+
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1
attributes #0 = { minsize noinline nounwind optsize }
Index: llvm/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2127,7 +2127,7 @@
// idea
unsigned MinSize;
Align PrefAlign;
- if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ if (!OptSize && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D132233.453994.patch
Type: text/x-patch
Size: 3883 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220819/79d17402/attachment.bin>
More information about the llvm-commits
mailing list