[PATCH] D25175: [MemCpyOpt] Optimize memcpy-memcpy dependencies more aggressively.

bryant via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 2 23:02:10 PDT 2016


bryant created this revision.
bryant added reviewers: efriedma, majnemer, rnk, aaron.ballman, eli.friedman.
bryant added a subscriber: llvm-commits.
bryant set the repository for this revision to rL LLVM.

Currently, memcpy-memcpy pairs are only considered when there are no mods or
refs of either the source or dest memory operands of the examined memcpy:

  ir
  memcpy(b <- a)  ; the "dependee" memcpy
  ...  ; no mod/ref of a, b, or c in between
  memcpy(c <- b)  ; the examined memcpy

In the above, if b and/or c are mod/refed in the space between the two
memcopies, then the mod/ref-ing instruction closest to the examined memcpy is
matched and the dependee is never seen. If on the other hand only a is
mod/refed in between, then the memcpy pair is recognized but ultimately ignored
because the `processMemCpyMemCpyDependence` transformation would be invalid:

  ir
  memcpy(b <- a); *a = 42; memcpy(c <- b)
      =>
  memcpy(b <- a); *a = 42; memcpy(c <- a)

What this patch does is search harder for memcpy pairs and then match and
transform them against three general cases:

Case 1:

  ir
  memcpy(b <- a); ...; *b = 42; ...; memcpy(a <- b);
      => if a is never mod/refed in between the two memcpys
  ...; *a = 42; ...; memcpy(b <- a);

Case 2 (essentially the todo mentioned in `processMemCpyMemCpyDependence`):

  ir
  memcpy(b <- a); ...;  memcpy(c <- b);
      => if "..." doesn't mod/ref either c or b
  memcpy(c <- a); memcpy(b <- a); *a = 42;

Case 3:

  ir
  memcpy(b <- a); ...; memcpy(c <- b)
      => if "..." doesn't mod/ref b or a
  ...; memcpy(b <- a); memcpy(c <- b)

**Feedback on the soundness of these three cases is eagerly sought.**

At this time, only case 2 has been implemented because it's the easiest and
most useful.  For instance:

  c
  typedef struct { unsigned char large[65536]; } S;
  
  extern void g_(S *);
  
  S p1(unsigned g) {
    S rv = {0};
    if (g) {
      S rv2;
      g_(&rv2);
      return rv2;
    }
    rv.large[g] = g + 1;
    return rv;
  }
  
  S p0() {
    S k = p1(32);
    k.large[445] = 2302;
    return k;
  }
  
  S set(S x, unsigned n) {
    x.large[n] = n;
    return x;
  }
  
  S p() {
    S k = p0();
    k = set(k, 99);
    k.large[22] += 23;
    return k;
  }

produces, at -O3 (without the patch; extraneous memcopies marked):

  ir
  define void @p(%struct.S* noalias nocapture sret) local_unnamed_addr #0 {
    %2 = alloca %struct.S, align 1
    %3 = alloca [22 x i8], align 8
    %4 = alloca [76 x i8], align 1
    %5 = alloca [345 x i8], align 1
    %6 = alloca [65090 x i8], align 2
    %7 = getelementptr inbounds [22 x i8], [22 x i8]* %3, i64 0, i64 0
    call void @llvm.lifetime.start(i64 22, i8* %7)
    %8 = getelementptr inbounds [76 x i8], [76 x i8]* %4, i64 0, i64 0
    call void @llvm.lifetime.start(i64 76, i8* %8)
    %9 = getelementptr inbounds [345 x i8], [345 x i8]* %5, i64 0, i64 0
    call void @llvm.lifetime.start(i64 345, i8* %9)
    %10 = getelementptr inbounds [65090 x i8], [65090 x i8]* %6, i64 0, i64 0
    call void @llvm.lifetime.start(i64 65090, i8* %10)
    %11 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 0
    call void @llvm.lifetime.start(i64 65536, i8* %11) #3, !noalias !8
    call void @g_(%struct.S* nonnull %2) #3, !noalias !8
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %11, i64 22, i32 1, i1 false)            <===
    %12 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 22
    %13 = load i8, i8* %12, align 1
    %14 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 23
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %8, i8* %14, i64 76, i32 1, i1 false)            <===
    %15 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 100
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %9, i8* %15, i64 345, i32 1, i1 false)           <===
    %16 = getelementptr inbounds %struct.S, %struct.S* %2, i64 0, i32 0, i64 446
    %17 = getelementptr inbounds [65090 x i8], [65090 x i8]* %6, i64 0, i64 0
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %17, i8* %16, i64 65090, i32 1, i1 false) #3     <===
    call void @llvm.lifetime.end(i64 65536, i8* %11) #3, !noalias !8
    %18 = add i8 %13, 23
    %19 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 0
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %19, i8* %7, i64 22, i32 1, i1 false)
    %20 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 22
    store i8 %18, i8* %20, align 1
    %21 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 23
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %21, i8* %8, i64 76, i32 1, i1 false)
    %22 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 99
    store i8 99, i8* %22, align 1
    %23 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 100
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %23, i8* %9, i64 345, i32 1, i1 false)
    %24 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 445
    store i8 -2, i8* %24, align 1
    %25 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0, i64 446
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %25, i8* %10, i64 65090, i32 1, i1 false)
    call void @llvm.lifetime.end(i64 22, i8* %7)
    call void @llvm.lifetime.end(i64 76, i8* %8)
    call void @llvm.lifetime.end(i64 345, i8* %9)
    call void @llvm.lifetime.end(i64 65090, i8* %10)
    ret void
  }

With this patch, The highlighted memcopies are properly seen, transformed, and
later removed by DSE.


Repository:
  rL LLVM

https://reviews.llvm.org/D25175

Files:
  lib/Transforms/Scalar/MemCpyOptimizer.cpp

-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25175.73235.patch
Type: text/x-patch
Size: 4794 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161003/7f6d83ba/attachment.bin>


More information about the llvm-commits mailing list