[llvm-bugs] [Bug 47300] New: [Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy

Mon Aug 24 16:10:43 PDT 2020

https://bugs.llvm.org/show_bug.cgi?id=47300

            Bug ID: 47300
           Summary: [Loop Idiom Recognize] Degrade performance when arrays
                    are replaced by memset/memcpy
           Product: libraries
           Version: trunk
          Hardware: Other
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: anhtuyen at ca.ibm.com
                CC: llvm-bugs at lists.llvm.org

Created attachment 23888
  --> https://bugs.llvm.org/attachment.cgi?id=23888&action=edit
The IR file of work.c when LIRP is not run.

If the trip count is "small", LIPR should not replace legal stores with
memset/memcpy. The following testcase can be used to demonstrate how Loop Idiom
Recognize Pass (LIRP) degrades the performance. 

1) Compiler work.c to an IR file called work.ll. 

2) Call opt with -loop-idiom to produce an IR file containing memset
instruction
   opt -basic-aa -loop-idiom -S work.ll -o work.yes.ll

3) Inspect to make sure LIRP did replace store with memset in work.yes.ll, but
not in work.ll

5) Compile the test.c, and link it with the IR from (1) and then the IR from
(2). 

clang++ -c test.c
clang++ test.o work.ll -o no
clang++ test.o work.yes.ll -o yes

6) Run both the executables on a quiet machine. On my performance machine,
times spent are:
With memset: **Time elapsed: 1.4215**
Without memset: **Time elapsed: 1.3611**

test.c

```
#include <stdio.h>
#include <time.h>

int work(int A[], int sizeI, int sizeL);

int main() {
  int A[3] = {1, 2, 3};
  int res = 1;
  clock_t begin = clock();
  res = work(A, 9999, 3);
  clock_t end = clock();
  double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("Time elapsed: %4.4f\n", time_spent);
  return res;
}
```

work.c
```
int work(int A[], int sizeI, int arraySize) {

    for (int i = 0; i < sizeI; ++i)
      for (int j = 0; j < sizeI; ++j)
        for (int k = 0; k < arraySize; ++k)
          A[k] = 0;

  return A[arraySize - 1];
}
```

IR before calling opt with **-loop-idiom**
```
; ModuleID = './work.ll'
source_filename = "work.c"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"

; Function Attrs: noinline nounwind
define dso_local signext i32 @_Z4workPiii(i32* %A, i32 signext %sizeI, i32
signext %arraySize)  #0 {
entry:
  %cmp6 = icmp slt i32 0, %sizeI
  br i1 %cmp6, label %for.body.preheader, label %for.end12

for.body.preheader:                               ; preds = %entry
  br label %for.body

for.body:                                         ; preds =
%for.body.preheader, %for.inc10
  %i.07 = phi i32 [ %inc11, %for.inc10 ], [ 0, %for.body.preheader ]
  %cmp23 = icmp slt i32 0, %sizeI
  br i1 %cmp23, label %for.body3.preheader, label %for.inc10

for.body3.preheader:                              ; preds = %for.body
  br label %for.body3

for.body3:                                        ; preds =
%for.body3.preheader, %for.inc7
  %j.04 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.body3.preheader ]
  %cmp51 = icmp slt i32 0, %arraySize
  br i1 %cmp51, label %for.body6.preheader, label %for.inc7

for.body6.preheader:                              ; preds = %for.body3
  br label %for.body6

for.body6:                                        ; preds =
%for.body6.preheader, %for.body6
  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
  %idxprom = sext i32 %k.02 to i64
  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
  store i32 0, i32* %arrayidx, align 4
  %inc = add nsw i32 %k.02, 1
  %cmp5 = icmp slt i32 %inc, %arraySize
  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit

for.inc7.loopexit:                                ; preds = %for.body6
  br label %for.inc7

for.inc7:                                         ; preds = %for.inc7.loopexit,
%for.body3
  %inc8 = add nsw i32 %j.04, 1
  %cmp2 = icmp slt i32 %inc8, %sizeI
  br i1 %cmp2, label %for.body3, label %for.inc10.loopexit

for.inc10.loopexit:                               ; preds = %for.inc7
  br label %for.inc10

for.inc10:                                        ; preds =
%for.inc10.loopexit, %for.body
  %inc11 = add nsw i32 %i.07, 1
  %cmp = icmp slt i32 %inc11, %sizeI
  br i1 %cmp, label %for.body, label %for.end12.loopexit

for.end12.loopexit:                               ; preds = %for.inc10
  br label %for.end12

for.end12:                                        ; preds =
%for.end12.loopexit, %entry
  %sub = sub nsw i32 %arraySize, 1
  %idxprom13 = sext i32 %sub to i64
  %arrayidx14 = getelementptr inbounds i32, i32* %A, i64 %idxprom13
  %0 = load i32, i32* %arrayidx14, align 4
  ret i32 %0
}

attributes #0 = { noinline nounwind
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false"
"frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0"
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false" "no-trapping-math"="true"
"stack-protector-buffer-size"="8" "target-cpu"="pwr9"
"target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-spe"
"unsafe-fp-math"="false" "use-soft-float"="false"
 }
```

Extract from IR **with memset** 
```
for.body6.preheader:                              ; preds = %for.body3
  call void @llvm.memset.p0i8.i64(i8* align 4 %A1, i8 0, i64 %1, i1 false)
  br label %for.body6

for.body6:                                        ; preds = %for.body6,
%for.body6.preheader
  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
  %idxprom = sext i32 %k.02 to i64
  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
  %inc = add nsw i32 %k.02, 1
  %cmp5 = icmp slt i32 %inc, %arraySize
  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```

Extract from IR **without memset**

```
for.body6.preheader:                              ; preds = %for.body3
  br label %for.body6

for.body6:                                        ; preds = %for.body6,
%for.body6.preheader
  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
  %idxprom = sext i32 %k.02 to i64
  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
  store i32 0, i32* %arrayidx, align 4
  %inc = add nsw i32 %k.02, 1
  %cmp5 = icmp slt i32 %inc, %arraySize
  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200824/de55d43f/attachment.html>