[PATCH] D97667: [loop-idiom] Hoist loop memcpys to loop preheader

Thu Apr 29 01:23:43 PDT 2021

tpopp added a comment.

  ; ModuleID = '__compute_module' 
  source_filename = "__compute_module"                                                                                               
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"                                       
  target triple = "x86_64-grtev4-linux-gnu"                                                                                          

  @0 = private unnamed_addr constant [96 x i8] c"\00\00\80?\00\00\00@\00\00@@\00\00\80@\00\00\A0@\00\00\C0@\00\00\E0@\00\00\00A\00\00
  \10A\00\00 A\00\000A\00\00 at A\00\00PA\00\00`A\00\00pA\00\00\80A\00\00\88A\00\00\90A\00\00\98A\00\00\A0A\00\00\A8A\00\00\B0A\00\00\B8
  A\00\00\C0A", align 16                                                                                                             

  ; Function Attrs: nofree norecurse nosync nounwind uwtable                                                                         
  define void @Reverse4DFloatArrayOnDim01.3(i8* nocapture readnone %retval, i8* noalias nocapture readnone %run_options, i8** noalias
   nocapture readnone %params, i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture readnone %prof_counters) local_u
  nnamed_addr #0 {                                                                                                                   
  entry:                                                                                                                             
    %0 = bitcast i8** %buffer_table to [4 x [3 x [2 x [1 x float]]]]**
    %1 = load [4 x [3 x [2 x [1 x float]]]]*, [4 x [3 x [2 x [1 x float]]]]** %0, align 8, !invariant.load !0, !dereferenceable !1, !
  align !2
    br label %reverse.2.loop_header.dim.1.preheader

  reverse.2.loop_header.dim.1.preheader:            ; preds = %entry, %reverse.2.loop_exit.dim.1
    %reverse.2.invar_address.dim.0.06 = phi i64 [ 0, %entry ], [ %invar.inc, %reverse.2.loop_exit.dim.1 ]
    %2 = mul nsw i64 %reverse.2.invar_address.dim.0.06, -24
    %3 = add i64 %2, 88
    br label %reverse.2.loop_header.dim.2.preheader

  reverse.2.loop_header.dim.2.preheader:            ; preds = %reverse.2.loop_header.dim.1.preheader, %reverse.2.loop_exit.dim
    %reverse.2.invar_address.dim.1.05 = phi i64 [ 0, %reverse.2.loop_header.dim.1.preheader ], [ %invar.inc1, %reverse.2.loop_exit.di
  m.2 ]
    %scevgep = getelementptr [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 %reverse.2.invar_address.di
  m.0.06, i64 %reverse.2.invar_address.dim.1.05, i64 0, i64 0
    %scevgep7 = bitcast float* %scevgep to i8*
    %4 = mul nsw i64 %reverse.2.invar_address.dim.1.05, -8
    %5 = add i64 %3, %4
    %scevgep8 = getelementptr [96 x i8], [96 x i8]* @0, i64 0, i64 %5
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %scevgep7, i8* align 4 %scevgep8, i64 8, i1 false)
    br label %reverse.2.loop_exit.dim.2

  reverse.2.loop_exit.dim.2:                        ; preds = %reverse.2.loop_header.dim.2.preheader
    %invar.inc1 = add nuw nsw i64 %reverse.2.invar_address.dim.1.05, 1
    %6 = icmp ugt i64 %reverse.2.invar_address.dim.1.05, 1
    br i1 %6, label %reverse.2.loop_exit.dim.1, label %reverse.2.loop_header.dim.2.preheader

  reverse.2.loop_exit.dim.1:                        ; preds = %reverse.2.loop_exit.dim.2
    %invar.inc = add nuw nsw i64 %reverse.2.invar_address.dim.0.06, 1
    %7 = icmp ugt i64 %reverse.2.invar_address.dim.0.06, 2
    br i1 %7, label %reverse.2.loop_exit.dim.0, label %reverse.2.loop_header.dim.1.preheader

  reverse.2.loop_exit.dim.0:                        ; preds = %reverse.2.loop_exit.dim.1
    ret void
  }

  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i64 %2, i1 immarg %3
  ) #1

  attributes #0 = { nofree norecurse nosync nounwind uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
  attributes #1 = { argmemonly nofree nosync nounwind willreturn }

  !0 = !{}
  !1 = !{i64 96}
  !2 = !{i64 16}

`opt -loop-idiom  <%s -S`

This shows the first time that this code is run and different IR is generated before and after. It then diverges further on a subsequent execution (where before an after have different inputs now). I am trying to find how to share a full `opt` command rather than sharing different snippets for the before/after inputs. I hope this first IR helps though

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97667/new/

https://reviews.llvm.org/D97667