[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?

Mon Oct 23 21:19:51 PDT 2017

Hi LLVM developers,

$ cat hello.c
#include <stdio.h>
int main(int argc, char *argv[]) {
   for (int i = 0; i < 10; i++) {
     printf("%d\n", i);
   }
   return 0;
}

$ /opt/llvm-svn/bin/clang --version
Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 6.0.0svn)
Target: x86_64-redhat-linux
Thread model: posix
InstalledDir: /opt/llvm-svn/bin

$ /opt/llvm-svn/bin/clang -S -emit-llvm hello.c -o hello3.ll

$ cat hello3.ll
; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-redhat-linux"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define i32 @main(i32, i8**) #0 {
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i8**, align 8
   %6 = alloca i32, align 4
   store i32 0, i32* %3, align 4
   store i32 %0, i32* %4, align 4
   store i8** %1, i8*** %5, align 8
   store i32 0, i32* %6, align 4
   br label %7

; <label>:7:                                      ; preds = %13, %2
   %8 = load i32, i32* %6, align 4
   %9 = icmp slt i32 %8, 10
   br i1 %9, label %10, label %16

; <label>:10:                                     ; preds = %7
   %11 = load i32, i32* %6, align 4
   %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11)
   br label %13

; <label>:13:                                     ; preds = %10
   %14 = load i32, i32* %6, align 4
   %15 = add nsw i32 %14, 1
   store i32 %15, i32* %6, align 4
   br label %7

; <label>:16:                                     ; preds = %7
   ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind optnone uwtable 
"correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-jump-tables"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"no-trapping-math"="false" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "target-cpu"="x86-64" 
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" 
"use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 
6.0.0svn)"}

$ /opt/llvm-svn//bin/opt -S -mem2reg -loops -loop-simplify -loop-rotate 
-lcssa -loop-unroll -sccp -simplifycfg hello3.ll -o hello3.loop.ll

$ cat hello3.loop.ll
; ModuleID = 'hello3.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-redhat-linux"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define i32 @main(i32, i8**) #0 {
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i8**, align 8
   %6 = alloca i32, align 4
   store i32 0, i32* %3, align 4
   store i32 %0, i32* %4, align 4
   store i8** %1, i8*** %5, align 8
   store i32 0, i32* %6, align 4
   br label %7

; <label>:7:                                      ; preds = %13, %2
   %8 = load i32, i32* %6, align 4
   %9 = icmp slt i32 %8, 10
   br i1 %9, label %10, label %16

; <label>:10:                                     ; preds = %7
   %11 = load i32, i32* %6, align 4
   %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11)
   br label %13

; <label>:13:                                     ; preds = %10
   %14 = load i32, i32* %6, align 4
   %15 = add nsw i32 %14, 1
   store i32 %15, i32* %6, align 4
   br label %7

; <label>:16:                                     ; preds = %7
   ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind optnone uwtable 
"correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-jump-tables"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"no-trapping-math"="false" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "target-cpu"="x86-64" 
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" 
"use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 
6.0.0svn)"}

LLVM 6.0's LoopUnroll PASS failed to unroll loops?

But LLVM 3.9.1 is able to work:

$ clang --version
clang version 3.9.1 (tags/RELEASE_391/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

$ clang -S -emit-llvm hello.c -o hello1.ll

$ cat hello1.ll
; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main(i32, i8**) #0 {
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i8**, align 8
   %6 = alloca i32, align 4
   store i32 0, i32* %3, align 4
   store i32 %0, i32* %4, align 4
   store i8** %1, i8*** %5, align 8
   store i32 0, i32* %6, align 4
   br label %7

; <label>:7:                                      ; preds = %13, %2
   %8 = load i32, i32* %6, align 4
   %9 = icmp slt i32 %8, 10
   br i1 %9, label %10, label %16

; <label>:10:                                     ; preds = %7
   %11 = load i32, i32* %6, align 4
   %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11)
   br label %13

; <label>:13:                                     ; preds = %10
   %14 = load i32, i32* %6, align 4
   %15 = add nsw i32 %14, 1
   store i32 %15, i32* %6, align 4
   br label %7

; <label>:16:                                     ; preds = %7
   ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { nounwind uwtable "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"stack-protector-buffer-size"="8" "target-cpu"="x86-64" 
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" 
"use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = !{!"clang version 3.9.1 (tags/RELEASE_391/final)"}

$ opt -S -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll 
-sccp -simplifycfg hello1.ll -o hello1.loop.ll

$ cat hello1.loop.ll
; ModuleID = 'hello1.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main(i32, i8**) #0 {
   %3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 0)
   %4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 1)
   %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 2)
   %6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 3)
   %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 4)
   %8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 5)
   %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 6)
   %10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 7)
   %11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 8)
   %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x 
i8], [4 x i8]* @.str, i32 0, i32 0), i32 9)
   ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { nounwind uwtable "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"stack-protector-buffer-size"="8" "target-cpu"="x86-64" 
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" 
"use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = !{!"clang version 3.9.1 (tags/RELEASE_391/final)"}

And also LLVM 3.1 is still able to work:

$ ./build/Release+Asserts/bin/opt --version
LLVM (http://llvm.org/):
   LLVM version 3.1
   Optimized build with assertions.
   Built Oct 23 2017 (16:22:51).
   Default target: x86_64-unknown-linux-gnu
   Host CPU: corei7-avx

$ ./build/Release+Asserts/bin/clang -S -emit-llvm hello.c -o hello.ll

$ cat hello.ll

; ModuleID = 'hello.c'
target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
   %argv.addr = alloca i8**, align 8
   %bits = alloca [4 x i32], align 16
   %iter = alloca i32, align 4
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
   store i8** %argv, i8*** %argv.addr, align 8
   store i32 0, i32* %iter, align 4
   br label %for.cond

for.cond:                                         ; preds = %for.inc, %entry
   %0 = load i32* %iter, align 4
   %cmp = icmp slt i32 %0, 10
   br i1 %cmp, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond
   %1 = load i32* %iter, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x 
i8]* @.str, i32 0, i32 0), i32 %1)
   br label %for.inc

for.inc:                                          ; preds = %for.body
   %2 = load i32* %iter, align 4
   %inc = add nsw i32 %2, 1
   store i32 %inc, i32* %iter, align 4
   br label %for.cond

for.end:                                          ; preds = %for.cond
   ret i32 0
}

declare i32 @printf(i8*, ...)

$ ./build/Release+Asserts/bin/opt -S -mem2reg -loops -loop-simplify 
-loop-rotate -lcssa -loop-unroll -sccp -simplifycfg hello.ll -o 
hello.loop.ll

$ cat hello.loop.ll

; ModuleID = 'hello.ll'
target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1

define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x 
i8]* @.str, i32 0, i32 0), i32 0)
   %call.1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 1)
   %call.2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 2)
   %call.3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 3)
   %call.4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 4)
   %call.5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 5)
   %call.6 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 6)
   %call.7 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 7)
   %call.8 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 8)
   %call.9 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 
x i8]* @.str, i32 0, i32 0), i32 9)
   ret i32 0
}

declare i32 @printf(i8*, ...)

Succeeded unrolled Optimized build with assertions.

Maybe I wrongly use the LoopUnroll PASS? or please give me some hints, 
thanks a lot!

-- 
Regards,
Leslie Zhai - https://reviews.llvm.org/p/xiangzhai/