[PATCH] D25175: [MemCpyOpt] Optimize memcpy-memcpy dependencies more aggressively.
bryant via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 2 23:02:10 PDT 2016
bryant created this revision.
bryant added reviewers: efriedma, majnemer, rnk, aaron.ballman, eli.friedman.
bryant added a subscriber: llvm-commits.
bryant set the repository for this revision to rL LLVM.
Currently, memcpy-memcpy pairs are only considered when there are no mods or
refs of either the source or dest memory operands of the examined memcpy:
ir
memcpy(b <- a) ; the "dependee" memcpy
... ; no mod/ref of a, b, or c in between
memcpy(c <- b) ; the examined memcpy
In the above, if b and/or c are mod/refed in the space between the two
memcopies, then the mod/ref-ing instruction closest to the examined memcpy is
matched and the dependee is never seen. If on the other hand only a is
mod/refed in between, then the memcpy pair is recognized but ultimately ignored
because the `processMemCpyMemCpyDependence` transformation would be invalid:
ir
memcpy(b <- a); *a = 42; memcpy(c <- b)
=>
memcpy(b <- a); *a = 42; memcpy(c <- a)
What this patch does is search harder for memcpy pairs and then match and
transform them against three general cases:
Case 1:
ir
memcpy(b <- a); ...; *b = 42; ...; memcpy(a <- b);
=> if a is never mod/refed in between the two memcpys
...; *a = 42; ...; memcpy(b <- a);
Case 2 (essentially the todo mentioned in `processMemCpyMemCpyDependence`):
ir
memcpy(b <- a); ...; memcpy(c <- b);
=> if "..." doesn't mod/ref either c or b
memcpy(c <- a); memcpy(b <- a); *a = 42;
Case 3:
ir
memcpy(b <- a); ...; memcpy(c <- b)
=> if "..." doesn't mod/ref b or a
...; memcpy(b <- a); memcpy(c <- b)
**Feedback on the soundness of these three cases is eagerly sought.**
At this time, only case 2 has been implemented because it's the easiest and
most useful. For instance:
c
typedef struct { unsigned char large[65536]; } S;
extern void g_(S *);
S p1(unsigned g) {
S rv = {0};
if (g) {
S rv2;
g_(&rv2);
return rv2;
}
rv.large[g] = g + 1;
return rv;
}
S p0() {
S k = p1(32);
k.large[445] = 2302;
return k;
}
S set(S x, unsigned n) {
x.large[n] = n;
return x;
}
S p() {
S k = p0();
k = set(k, 99);
k.large[22] += 23;
return k;
}
produces, at -O3 (without the patch; extraneous memcopies marked):
ir
define void @p(%struct.S* noalias nocapture sret) local_unnamed_addr #0 {
%2 = alloca %struct.S, align 1
%3 = alloca [22 x i8], align 8
%4 = alloca [76 x i8], align 1
%5 = alloca [345 x i8], align 1
%6 = alloca [65090 x i8], align 2
%7 = getelementptr inbounds [22 x i8], [22 x i8]* %3, i64 0, i64 0
call void @llvm.lifetime.start(i64 22, i8* %7)
%8 = getelementptr inbounds [76 x i8], [76 x i8]* %4, i64 0, i64 0
call void @llvm.lifetime.start(i64 76, i8* %8)
%9 = getelementptr inbounds [345 x i8], [345 x i8]* %5, i64 0, i64 0
call void @llvm.lifetime.start(i64 345, i8* %9)
%10 = getelementptr inbounds [65090 x i8], [65090 x i8]* %6, i64 0, i64 0
call void @llvm.lifetime.start(i64 65090, i8* %10)
%11 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 0
call void @llvm.lifetime.start(i64 65536, i8* %11) #3, !noalias !8
call void @g_(%struct.S* nonnull %2) #3, !noalias !8
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %11, i64 22, i32 1, i1 false) <===
%12 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 22
%13 = load i8, i8* %12, align 1
%14 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 23
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %8, i8* %14, i64 76, i32 1, i1 false) <===
%15 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 100
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %9, i8* %15, i64 345, i32 1, i1 false) <===
%16 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 446
%17 = getelementptr inbounds [65090 x i8], [65090 x i8]* %6, i64 0, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %17, i8* %16, i64 65090, i32 1, i1 false) #3 <===
call void @llvm.lifetime.end(i64 65536, i8* %11) #3, !noalias !8
%18 = add i8 %13, 23
%19 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %19, i8* %7, i64 22, i32 1, i1 false)
%20 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 22
store i8 %18, i8* %20, align 1
%21 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 23
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %21, i8* %8, i64 76, i32 1, i1 false)
%22 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 99
store i8 99, i8* %22, align 1
%23 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 100
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %23, i8* %9, i64 345, i32 1, i1 false)
%24 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 445
store i8 -2, i8* %24, align 1
%25 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 446
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %25, i8* %10, i64 65090, i32 1, i1 false)
call void @llvm.lifetime.end(i64 22, i8* %7)
call void @llvm.lifetime.end(i64 76, i8* %8)
call void @llvm.lifetime.end(i64 345, i8* %9)
call void @llvm.lifetime.end(i64 65090, i8* %10)
ret void
}
With this patch, The highlighted memcopies are properly seen, transformed, and
later removed by DSE.
Repository:
rL LLVM
https://reviews.llvm.org/D25175
Files:
lib/Transforms/Scalar/MemCpyOptimizer.cpp
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25175.73235.patch
Type: text/x-patch
Size: 4794 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161003/7f6d83ba/attachment.bin>
More information about the llvm-commits
mailing list