[llvm-commits] [PATCH] Multidimensional Array Index Delinearization Analysis

Thu Sep 27 02:05:54 PDT 2012

On 09/26/2012 07:22 PM, Hal Finkel wrote:
> On Wed, 26 Sep 2012 16:52:48 +0200
> Tobias Grosser <tobias at grosser.es> wrote:
>
>> On 09/26/2012 04:34 PM, Hal Finkel wrote:
>>> On Wed, 26 Sep 2012 10:10:28 +0200
>>> Tobias Grosser <tobias at grosser.es> wrote:
>>>
>>>> On 09/26/2012 12:02 AM, Hal Finkel wrote:
>>>>> I've attached an updated version of the pass. This version works
>>>>> better (several bugs have been fixed), and also includes a working
>>>>> bounds analysis.
>>>>
>>>> Hi Hal,
>>>>
>>>> I tested this one again and found it has still problems.
>>>> himeno_2.ll gives the expected output, himeno_1.ll does not.
>>>
>>> You need to run LICM first in order for case 1 to work correctly. So
>>> running opt -analyze -licm -delinearize and things are fine.
>>
>> Would it be difficult to avoid this? The original test case had
>> branches/loops that blocked LICM. (If needed, I could provide a test
>> case where -licm does not perform a transformation).
>
> I've attached an updated patch. Using SE's loop-invariance check (as a
> backup to LoopInfo's) was sufficient to fix the problem in this case.
> Please see if it helps in your other tests as well.

I attached another test case. This works after -O3, but only because 
tbaa can prove that the loads can be hoisted out of the loop nest. 
Without tbaa this is difficult. There is probably nothing you need to 
change due to this test case, but it illustrates a difficulty I have 
seen a couple of times.

Also, running LICM to prove those loads are loop invariant may be 
problematic. It tends to introduce a lot of additional dependences, 
which complicate future optimizations. Hence, we run Polly before LICM.

And a last point: How hard would it be to provide a SCEV based interface?

In Polly, we normally scan the code for load and store instructions,
get the SCEV from the pointer of the load/store and subtract the base 
pointer to get a SCEV representing the subscript. It would be awesome, 
if we could give that subscribt-scev to the delinearizer, which would 
then provide us with the delinearized subscripts.

Cheers
Tobi

-------------- next part --------------
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
; 
; struct Mat {
;   float* m;
;   int mnums;
;   int mrows;
;   int mcols;
;   int mdeps;
; };
; 
; typedef struct Mat Matrix;
; 
; void jacobi(int nn, Matrix* a, Matrix* p)
; {
;   long i, j, k, max,jmax,kmax;
; 
;   p_rows_sub = p->mrows - 1;
;   p_cols_sub = p->mcols - 1;
;   p_deps_sub = p->mdeps - 1;
; 
;     for(i = 1; i < p_rows_sub; i++)
;       for(j = 1; j < p_cols_sub; j++)
;         for(k = 1; k < p_deps_sub; k++)
;           MR(a,0,i,j,k) = i + j + k;
; }
; 
; Access function:
; 
; {
;   {
;     {
;       (4
;        + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64)))
;        + %a.base
;       ),
;       +,
;       (4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))
;     }<%for.i>,
;     +,
;     (4 * (sext i32 %a.deps to i64))
;   }<%for.j>,
;   +,
;   4
; }<%for.k>

%struct.Mat = type { float*, i32, i32, i32, i32 }

define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
entry:
  %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2
  %p.rows = load i32* %p.rows.ptr
  %p.rows.sub = add i32 %p.rows, -1
  %p.rows.sext = sext i32 %p.rows.sub to i64
  %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3
  %p.cols = load i32* %p.cols.ptr
  %p.cols.sub = add i32 %p.cols, -1
  %p.cols.sext = sext i32 %p.cols.sub to i64
  %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4
  %p.deps = load i32* %p.deps.ptr
  %p.deps.sub = add i32 %p.deps, -1
  %p.deps.sext = sext i32 %p.deps.sub to i64
  %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3
  %a.cols = load i32* %a.cols.ptr
  %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4
  %a.deps = load i32* %a.deps.ptr
  %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0
  %a.base = load float** %a.base.ptr, align 8
  br label %for.i

for.i:                                            ; preds = %for.i.inc, %entry
  %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ]
  br label %for.j

for.j:                                            ; preds = %for.j.inc, %for.i
  %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ]
  %a.cols.sext = sext i32 %a.cols to i64
  %a.deps.sext = sext i32 %a.deps to i64
  br label %for.k

for.k:                                            ; preds = %for.k, %for.j
  %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ]
  %tmp1 = mul nsw i64 %a.cols.sext, %i
  %tmp2 = add i64 %tmp1, %j
  %tmp3 = mul i64 %tmp2, %a.deps.sext
  %tmp4 = add nsw i64 %k, %tmp3
  %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4
  store float 1.000000e+00, float* %arrayidx
  %k.inc = add nsw i64 %k, 1
  %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext
  br i1 %k.exitcond, label %for.j.inc, label %for.k

for.j.inc:                                        ; preds = %for.k
  %j.inc = add nsw i64 %j, 1
  %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext
  br i1 %j.exitcond, label %for.i.inc, label %for.j

for.i.inc:                                        ; preds = %for.j.inc
  %i.inc = add nsw i64 %i, 1
  %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext
  br i1 %i.exitcond, label %end, label %for.i

end:                                              ; preds = %for.i.inc
  ret void
}