<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - [Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy"

   href="https://bugs.llvm.org/show_bug.cgi?id=47300">47300</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>Other

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Loop Optimizer

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>anhtuyen@ca.ibm.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Created <span class=""><a href="attachment.cgi?id=23888" name="attach_23888" title="The IR file of work.c when LIRP is not run.">attachment 23888</a> <a href="attachment.cgi?id=23888&action=edit" title="The IR file of work.c when LIRP is not run.">[details]</a></span>

The IR file of work.c when LIRP is not run.

If the trip count is "small", LIPR should not replace legal stores with

memset/memcpy. The following testcase can be used to demonstrate how Loop Idiom

Recognize Pass (LIRP) degrades the performance. 

1) Compiler work.c to an IR file called work.ll. 

2) Call opt with -loop-idiom to produce an IR file containing memset

instruction

   opt -basic-aa -loop-idiom -S work.ll -o work.yes.ll

3) Inspect to make sure LIRP did replace store with memset in work.yes.ll, but

not in work.ll

5) Compile the test.c, and link it with the IR from (1) and then the IR from

(2). 

clang++ -c test.c

clang++ test.o work.ll -o no

clang++ test.o work.yes.ll -o yes

6) Run both the executables on a quiet machine. On my performance machine,

times spent are:

With memset: **Time elapsed: 1.4215**

Without memset: **Time elapsed: 1.3611**

test.c

```

#include <stdio.h>

#include <time.h>

int work(int A[], int sizeI, int sizeL);

int main() {

  int A[3] = {1, 2, 3};

  int res = 1;

  clock_t begin = clock();

  res = work(A, 9999, 3);

  clock_t end = clock();

  double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("Time elapsed: %4.4f\n", time_spent);

  return res;

}

```

work.c

```

int work(int A[], int sizeI, int arraySize) {

    for (int i = 0; i < sizeI; ++i)

      for (int j = 0; j < sizeI; ++j)

        for (int k = 0; k < arraySize; ++k)

          A[k] = 0;

  return A[arraySize - 1];

}

```

IR before calling opt with **-loop-idiom**

```

; ModuleID = './work.ll'

source_filename = "work.c"

target datalayout = "e-m:e-i64:64-n32:64"

target triple = "powerpc64le-unknown-linux-gnu"

; Function Attrs: noinline nounwind

define dso_local signext i32 @_Z4workPiii(i32* %A, i32 signext %sizeI, i32

signext %arraySize)  #0 {

entry:

  %cmp6 = icmp slt i32 0, %sizeI

  br i1 %cmp6, label %for.body.preheader, label %for.end12

for.body.preheader:                               ; preds = %entry

  br label %for.body

for.body:                                         ; preds =

%for.body.preheader, %for.inc10

  %i.07 = phi i32 [ %inc11, %for.inc10 ], [ 0, %for.body.preheader ]

  %cmp23 = icmp slt i32 0, %sizeI

  br i1 %cmp23, label %for.body3.preheader, label %for.inc10

for.body3.preheader:                              ; preds = %for.body

  br label %for.body3

for.body3:                                        ; preds =

%for.body3.preheader, %for.inc7

  %j.04 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.body3.preheader ]

  %cmp51 = icmp slt i32 0, %arraySize

  br i1 %cmp51, label %for.body6.preheader, label %for.inc7

for.body6.preheader:                              ; preds = %for.body3

  br label %for.body6

for.body6:                                        ; preds =

%for.body6.preheader, %for.body6

  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]

  %idxprom = sext i32 %k.02 to i64

  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom

  store i32 0, i32* %arrayidx, align 4

  %inc = add nsw i32 %k.02, 1

  %cmp5 = icmp slt i32 %inc, %arraySize

  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit

for.inc7.loopexit:                                ; preds = %for.body6

  br label %for.inc7

for.inc7:                                         ; preds = %for.inc7.loopexit,

%for.body3

  %inc8 = add nsw i32 %j.04, 1

  %cmp2 = icmp slt i32 %inc8, %sizeI

  br i1 %cmp2, label %for.body3, label %for.inc10.loopexit

for.inc10.loopexit:                               ; preds = %for.inc7

  br label %for.inc10

for.inc10:                                        ; preds =

%for.inc10.loopexit, %for.body

  %inc11 = add nsw i32 %i.07, 1

  %cmp = icmp slt i32 %inc11, %sizeI

  br i1 %cmp, label %for.body, label %for.end12.loopexit

for.end12.loopexit:                               ; preds = %for.inc10

  br label %for.end12

for.end12:                                        ; preds =

%for.end12.loopexit, %entry

  %sub = sub nsw i32 %arraySize, 1

  %idxprom13 = sext i32 %sub to i64

  %arrayidx14 = getelementptr inbounds i32, i32* %A, i64 %idxprom13

  %0 = load i32, i32* %arrayidx14, align 4

  ret i32 %0

}

attributes #0 = { noinline nounwind

"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false"

"frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0"

"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"

"no-signed-zeros-fp-math"="false" "no-trapping-math"="true"

"stack-protector-buffer-size"="8" "target-cpu"="pwr9"

"target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-spe"

"unsafe-fp-math"="false" "use-soft-float"="false"

 }

```

Extract from IR **with memset** 

```

for.body6.preheader:                              ; preds = %for.body3

  call void @llvm.memset.p0i8.i64(i8* align 4 %A1, i8 0, i64 %1, i1 false)

  br label %for.body6

for.body6:                                        ; preds = %for.body6,

%for.body6.preheader

  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]

  %idxprom = sext i32 %k.02 to i64

  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom

  %inc = add nsw i32 %k.02, 1

  %cmp5 = icmp slt i32 %inc, %arraySize

  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit

```

Extract from IR **without memset**

```

for.body6.preheader:                              ; preds = %for.body3

  br label %for.body6

for.body6:                                        ; preds = %for.body6,

%for.body6.preheader

  %k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]

  %idxprom = sext i32 %k.02 to i64

  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom

  store i32 0, i32* %arrayidx, align 4

  %inc = add nsw i32 %k.02, 1

  %cmp5 = icmp slt i32 %inc, %arraySize

  br i1 %cmp5, label %for.body6, label %for.inc7.loopexit

```</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>