[PATCH] D97667: [loop-idiom] Hoist loop memcpys to loop perheader

Han Zhu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 1 01:21:25 PST 2021


zhuhan0 created this revision.
zhuhan0 added reviewers: zino, wenlei, hoyFB, reames, chandlerc, lattner.
Herald added subscribers: hoy, jfb, hiraditya.
zhuhan0 requested review of this revision.
Herald added a project: LLVM.

For a simple loop like:

  struct S {
    int x;
    int y;
    char b;
  };
  
  unsigned foo(S* __restrict__ a, S* b, int n) {
    for (int i = 0; i < n; i++)
      a[i] = b[i];
  
    return sizeof(a[0]);
  }

We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll`

  %struct.S = type { i32, i32, i8 }
  
  define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr {
  entry:
    %cmp7 = icmp sgt i32 %n, 0
    br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
  
  for.body.preheader:                               ; preds = %entry
    br label %for.body
  
  for.cond.cleanup.loopexit:                        ; preds = %for.body
    br label %for.cond.cleanup
  
  for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
    ret i32 12
  
  for.body:                                         ; preds = %for.body, %for.body.preheader
    %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
    %idxprom = zext i32 %i.08 to i64
    %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
    %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
    %0 = bitcast %struct.S* %arrayidx2 to i8*
    %1 = bitcast %struct.S* %arrayidx to i8*
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false)
    %inc = add nuw nsw i32 %i.08, 1
    %cmp = icmp slt i32 %inc, %n
    br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
  }
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
  
  attributes #0 = { argmemonly nofree nosync nounwind willreturn }

The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic.

With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass.

  %struct.S = type { i32, i32, i8 }
  
  define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr {
  entry:
    %a1 = bitcast %struct.S* %a to i8*
    %b2 = bitcast %struct.S* %b to i8*
    %cmp7 = icmp sgt i32 %n, 0
    br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
  
  for.body.preheader:                               ; preds = %entry
    %0 = zext i32 %n to i64
    %1 = mul nuw nsw i64 %0, 12
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false)
    br label %for.body
  
  for.cond.cleanup.loopexit:                        ; preds = %for.body
    br label %for.cond.cleanup
  
  for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
    ret i32 12
  
  for.body:                                         ; preds = %for.body, %for.body.preheader
    %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
    %idxprom = zext i32 %i.08 to i64
    %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
    %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
    %2 = bitcast %struct.S* %arrayidx2 to i8*
    %3 = bitcast %struct.S* %arrayidx to i8*
    %inc = add nuw nsw i32 %i.08, 1
    %cmp = icmp slt i32 %inc, %n
    br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
  }
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
  
  attributes #0 = { argmemonly nofree nosync nounwind willreturn }


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97667

Files:
  llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
  llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll

-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97667.327046.patch
Type: text/x-patch
Size: 29340 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210301/44591096/attachment.bin>


More information about the llvm-commits mailing list