[PATCH] D33946: [InlineCost] Find identical loads in the callee

Haicheng Wu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 28 07:08:22 PDT 2017


haicheng added a comment.

In https://reviews.llvm.org/D33946#791658, @mehdi_amini wrote:

> The description says "The motivation example is a fully unrolled 3x3 matrix multiplication. It loads every data in matrix a and b three times because of the Stores between them", but from reading the code I don't see how this case is handled since it seems that you bail as soon as you see a store (or a call)? What did I miss? Is the description still up-to-date?


Hi Mehdi,

Sorry for the confusion.  The matrix multiplication was the test case, but I replaced it with some simple ones when I updated the patch.

Here is the IR of matrix multiplication example.  Every input data are loaded three times (e.g. %arrayidx8).  The address of Stores in the callee are based on an alloc in the caller.  Inline Cost model can figure out that these stores can be eliminated if the callee is inlined (see VisitStore()), then the repeated Loads clobbered by the Stores are safe to be eliminated too.  Does that make sense?

If the callee has Stores that cannot be eliminated, I just bail out for now.

  %struct.matrix = type { [3 x [3 x double]] }
  
  define void @outer(%struct.matrix* %a, %struct.matrix* %b) {
    %c = alloca %struct.matrix
    call void @matrix_multiply(%struct.matrix* %a, %struct.matrix* %b, %struct.matrix* %c)
    ret void
  }
  
  ; Every data in matrix a and b are loaded three times.
  ; typedef struct { double m[3][3]; } matrix;
  ; void matrix_multiply( matrix *a, matrix *b, matrix *c ){
  ;   int i,j,k;
  ;     for(i=0; i<3; i++)
  ;       for(j=0; j<3; j++)
  ;          for(k=0; k<3; k++)
  ;            c->m[i][j] += a->m[i][k] * b->m[k][j];
  ; }
  
  define void @matrix_multiply(%struct.matrix* %a, %struct.matrix* %b, %struct.matrix* %c) {
  entry:
    %arrayidx8 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 0, i64 0
    %arrayidx18 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 0, i64 0
    %0 = load double, double* %arrayidx8
    %arrayidx13 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 0, i64 0
    %1 = load double, double* %arrayidx13
    %mul = fmul double %0, %1
    %2 = load double, double* %arrayidx18
    %add = fadd double %2, %mul
    store double %add, double* %arrayidx18
    %arrayidx8.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 0, i64 1
    %3 = load double, double* %arrayidx8.1
    %arrayidx13.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 1, i64 0
    %4 = load double, double* %arrayidx13.1
    %mul.1 = fmul double %3, %4
    %add.1 = fadd double %add, %mul.1
    store double %add.1, double* %arrayidx18
    %arrayidx8.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 0, i64 2
    %5 = load double, double* %arrayidx8.2
    %arrayidx13.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 2, i64 0
    %6 = load double, double* %arrayidx13.2
    %mul.2 = fmul double %5, %6
    %add.2 = fadd double %add.1, %mul.2
    store double %add.2, double* %arrayidx18
    %arrayidx18.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 0, i64 1
    %7 = load double, double* %arrayidx8
    %arrayidx13.140 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 0, i64 1
    %8 = load double, double* %arrayidx13.140
    %mul.141 = fmul double %7, %8
    %9 = load double, double* %arrayidx18.1
    %add.142 = fadd double %9, %mul.141
    store double %add.142, double* %arrayidx18.1
    %10 = load double, double* %arrayidx8.1
    %arrayidx13.1.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 1, i64 1
    %11 = load double, double* %arrayidx13.1.1
    %mul.1.1 = fmul double %10, %11
    %add.1.1 = fadd double %add.142, %mul.1.1
    store double %add.1.1, double* %arrayidx18.1
    %12 = load double, double* %arrayidx8.2
    %arrayidx13.2.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 2, i64 1
    %13 = load double, double* %arrayidx13.2.1
    %mul.2.1 = fmul double %12, %13
    %add.2.1 = fadd double %add.1.1, %mul.2.1
    store double %add.2.1, double* %arrayidx18.1
    %arrayidx18.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 0, i64 2
    %14 = load double, double* %arrayidx8
    %arrayidx13.243 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 0, i64 2
    %15 = load double, double* %arrayidx13.243
    %mul.244 = fmul double %14, %15
    %16 = load double, double* %arrayidx18.2
    %add.245 = fadd double %16, %mul.244
    store double %add.245, double* %arrayidx18.2
    %17 = load double, double* %arrayidx8.1
    %arrayidx13.1.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 1, i64 2
    %18 = load double, double* %arrayidx13.1.2
    %mul.1.2 = fmul double %17, %18
    %add.1.2 = fadd double %add.245, %mul.1.2
    store double %add.1.2, double* %arrayidx18.2
    %19 = load double, double* %arrayidx8.2
    %arrayidx13.2.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %b, i64 0, i32 0, i64 2, i64 2
    %20 = load double, double* %arrayidx13.2.2
    %mul.2.2 = fmul double %19, %20
    %add.2.2 = fadd double %add.1.2, %mul.2.2
    store double %add.2.2, double* %arrayidx18.2
    %arrayidx8.146 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 1, i64 0
    %arrayidx18.147 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 1, i64 0
    %21 = load double, double* %arrayidx8.146
    %22 = load double, double* %arrayidx13
    %mul.149 = fmul double %21, %22
    %23 = load double, double* %arrayidx18.147
    %add.150 = fadd double %23, %mul.149
    store double %add.150, double* %arrayidx18.147
    %arrayidx8.1.151 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 1, i64 1
    %24 = load double, double* %arrayidx8.1.151
    %25 = load double, double* %arrayidx13.1
    %mul.1.153 = fmul double %24, %25
    %add.1.154 = fadd double %add.150, %mul.1.153
    store double %add.1.154, double* %arrayidx18.147
    %arrayidx8.2.155 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 1, i64 2
    %26 = load double, double* %arrayidx8.2.155
    %27 = load double, double* %arrayidx13.2
    %mul.2.157 = fmul double %26, %27
    %add.2.158 = fadd double %add.1.154, %mul.2.157
    store double %add.2.158, double* %arrayidx18.147
    %arrayidx18.1.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 1, i64 1
    %28 = load double, double* %arrayidx8.146
    %29 = load double, double* %arrayidx13.140
    %mul.141.1 = fmul double %28, %29
    %30 = load double, double* %arrayidx18.1.1
    %add.142.1 = fadd double %30, %mul.141.1
    store double %add.142.1, double* %arrayidx18.1.1
    %31 = load double, double* %arrayidx8.1.151
    %32 = load double, double* %arrayidx13.1.1
    %mul.1.1.1 = fmul double %31, %32
    %add.1.1.1 = fadd double %add.142.1, %mul.1.1.1
    store double %add.1.1.1, double* %arrayidx18.1.1
    %33 = load double, double* %arrayidx8.2.155
    %34 = load double, double* %arrayidx13.2.1
    %mul.2.1.1 = fmul double %33, %34
    %add.2.1.1 = fadd double %add.1.1.1, %mul.2.1.1
    store double %add.2.1.1, double* %arrayidx18.1.1
    %arrayidx18.2.1 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 1, i64 2
    %35 = load double, double* %arrayidx8.146
    %36 = load double, double* %arrayidx13.243
    %mul.244.1 = fmul double %35, %36
    %37 = load double, double* %arrayidx18.2.1
    %add.245.1 = fadd double %37, %mul.244.1
    store double %add.245.1, double* %arrayidx18.2.1
    %38 = load double, double* %arrayidx8.1.151
    %39 = load double, double* %arrayidx13.1.2
    %mul.1.2.1 = fmul double %38, %39
    %add.1.2.1 = fadd double %add.245.1, %mul.1.2.1
    store double %add.1.2.1, double* %arrayidx18.2.1
    %40 = load double, double* %arrayidx8.2.155
    %41 = load double, double* %arrayidx13.2.2
    %mul.2.2.1 = fmul double %40, %41
    %add.2.2.1 = fadd double %add.1.2.1, %mul.2.2.1
    store double %add.2.2.1, double* %arrayidx18.2.1
    %arrayidx8.260 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 2, i64 0
    %arrayidx18.261 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 2, i64 0
    %42 = load double, double* %arrayidx8.260
    %43 = load double, double* %arrayidx13
    %mul.263 = fmul double %42, %43
    %44 = load double, double* %arrayidx18.261
    %add.264 = fadd double %44, %mul.263
    store double %add.264, double* %arrayidx18.261
    %arrayidx8.1.265 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 2, i64 1
    %45 = load double, double* %arrayidx8.1.265
    %46 = load double, double* %arrayidx13.1
    %mul.1.267 = fmul double %45, %46
    %add.1.268 = fadd double %add.264, %mul.1.267
    store double %add.1.268, double* %arrayidx18.261
    %arrayidx8.2.269 = getelementptr inbounds %struct.matrix, %struct.matrix* %a, i64 0, i32 0, i64 2, i64 2
    %47 = load double, double* %arrayidx8.2.269
    %48 = load double, double* %arrayidx13.2
    %mul.2.271 = fmul double %47, %48
    %add.2.272 = fadd double %add.1.268, %mul.2.271
    store double %add.2.272, double* %arrayidx18.261
    %arrayidx18.1.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 2, i64 1
    %49 = load double, double* %arrayidx8.260
    %50 = load double, double* %arrayidx13.140
    %mul.141.2 = fmul double %49, %50
    %51 = load double, double* %arrayidx18.1.2
    %add.142.2 = fadd double %51, %mul.141.2
    store double %add.142.2, double* %arrayidx18.1.2
    %52 = load double, double* %arrayidx8.1.265
    %53 = load double, double* %arrayidx13.1.1
    %mul.1.1.2 = fmul double %52, %53
    %add.1.1.2 = fadd double %add.142.2, %mul.1.1.2
    store double %add.1.1.2, double* %arrayidx18.1.2
    %54 = load double, double* %arrayidx8.2.269
    %55 = load double, double* %arrayidx13.2.1
    %mul.2.1.2 = fmul double %54, %55
    %add.2.1.2 = fadd double %add.1.1.2, %mul.2.1.2
    store double %add.2.1.2, double* %arrayidx18.1.2
    %arrayidx18.2.2 = getelementptr inbounds %struct.matrix, %struct.matrix* %c, i64 0, i32 0, i64 2, i64 2
    %56 = load double, double* %arrayidx8.260
    %57 = load double, double* %arrayidx13.243
    %mul.244.2 = fmul double %56, %57
    %58 = load double, double* %arrayidx18.2.2
    %add.245.2 = fadd double %58, %mul.244.2
    store double %add.245.2, double* %arrayidx18.2.2
    %59 = load double, double* %arrayidx8.1.265
    %60 = load double, double* %arrayidx13.1.2
    %mul.1.2.2 = fmul double %59, %60
    %add.1.2.2 = fadd double %add.245.2, %mul.1.2.2
    store double %add.1.2.2, double* %arrayidx18.2.2
    %61 = load double, double* %arrayidx8.2.269
    %62 = load double, double* %arrayidx13.2.2
    %mul.2.2.2 = fmul double %61, %62
    %add.2.2.2 = fadd double %add.1.2.2, %mul.2.2.2
    store double %add.2.2.2, double* %arrayidx18.2.2
    ret void
  }

Haicheng



================
Comment at: lib/Analysis/InlineCost.cpp:265
   unsigned SROACostSavingsLost;
+  unsigned CSELoadCostSavings;
 
----------------
mehdi_amini wrote:
> chandlerc wrote:
> > Similarly, I would call this `LoadCSECostSavings`.
> Why do we have both `LoadEliminationCostSavings` and `LoadEliminationCost` ?
`LoadEliminationCost` is used in the cost calculation, and  `LoadEliminationCostSavings` is used for stats printing.  This is the same as `SROACostSavings` and `SROAArgCosts`.


Repository:
  rL LLVM

https://reviews.llvm.org/D33946





More information about the llvm-commits mailing list