[llvm-bugs] [Bug 26642] New: Miscompilation caused by stack adjustment code clobbering used registers
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Feb 16 13:27:44 PST 2016
https://llvm.org/bugs/show_bug.cgi?id=26642
Bug ID: 26642
Summary: Miscompilation caused by stack adjustment code
clobbering used registers
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: normal
Priority: P
Component: Backend: AArch64
Assignee: unassignedbugs at nondot.org
Reporter: andrew.b.adams at gmail.com
CC: llvm-bugs at lists.llvm.org
Classification: Unclassified
Some time on Friday one of the Halide tests starting returning wrong values on
the arm64 buildbot:
http://buildbot.halide-lang.org:8010/builders/arm64-linux-64-trunk/builds/69
(Halide is a language built on LLVM. Our buildbots pull and test against trunk
llvm every four hours or so.)
The cause seems to be stack adjustment code that clobbers an in-use register.
Below is .ll from llvm 3.7, and the asm it produces, and .ll from trunk llvm,
and the asm it produces. Pay attention to x9. It's the address of one of the
outputs (the one that's coming out as wrong).
In the working code, x9 is used as a temporary to adjust the stack downwards,
and then is loaded from [x1], and then used as the address to write the output
to near the end of the function (str w11, [x9])
In the broken code, x9 is loaded from the argument, *then* used as a temporary
to adjust the stack downward (clobbering it), and then used as an address to
write to by the same store instruction. This now becomes a useless store to the
stack instead of actually writing the output value.
Good ll and asm:
; ModuleID = 'halide_module_f10'
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabihf"
%struct.halide_filter_argument_t = type { i8*, i32, i32, i32, i32,
%struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*,
%struct.halide_scalar_value_t* }
%struct.halide_scalar_value_t = type { %union.anon }
%union.anon = type { double }
%struct.halide_filter_metadata_t = type { i32, i32,
%struct.halide_filter_argument_t*, i8*, i8* }
%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8,
i8, [2 x i8] }
@str = private constant [6 x i8] c"f10.0\00", align 32
@str.2 = private constant [6 x i8] c"f10.1\00", align 32
@0 = private constant [2 x %struct.halide_filter_argument_t]
[%struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x
i8]* @str, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32,
%struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null,
%struct.halide_scalar_value_t* null }, %struct.halide_filter_argument_t { i8*
getelementptr inbounds ([6 x i8], [6 x i8]* @str.2, i32 0, i32 0), i32 2, i32
0, i32 0, i32 32, %struct.halide_scalar_value_t* null,
%struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }]
@str.3 = private constant [35 x i8] c"arm-64-linux-no_asserts-no_runtime\00",
align 32
@str.4 = private constant [4 x i8] c"f10\00", align 32
@f10_metadata = constant %struct.halide_filter_metadata_t { i32 0, i32 2,
%struct.halide_filter_argument_t* getelementptr inbounds ([2 x
%struct.halide_filter_argument_t], [2 x %struct.halide_filter_argument_t]* @0,
i32 0, i32 0), i8* getelementptr inbounds ([35 x i8], [35 x i8]* @str.3, i32 0,
i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str.4, i32 0, i32 0) }
; Function Attrs: nounwind
define i32 @__f10(%struct.buffer_t* noalias nocapture %f10.0.buffer,
%struct.buffer_t* noalias nocapture %f10.1.buffer) #0 {
entry:
%f8.0.host59 = alloca [13 x <8 x i32>], align 32
%f8.1.host60 = alloca [13 x <8 x i32>], align 32
%buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 1
%f10.0.host = load i8*, i8** %buf_host, align 8
%buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 0
%f10.0.dev = load i64, i64* %buf_dev, align 8
%0 = icmp eq i64 %f10.0.dev, 0
%1 = icmp eq i8* %f10.0.host, null
%f10.0.host_and_dev_are_null = and i1 %1, %0
%buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 1
%f10.1.host = load i8*, i8** %buf_host10, align 8
%buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 0
%f10.1.dev = load i64, i64* %buf_dev11, align 8
%2 = icmp eq i64 %f10.1.dev, 0
%3 = icmp eq i8* %f10.1.host, null
%f10.1.host_and_dev_are_null = and i1 %3, %2
br i1 %f10.0.host_and_dev_are_null, label %true_bb, label %after_bb
true_bb: ; preds = %entry
%buf_elem_size27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 5
store i32 4, i32* %buf_elem_size27, align 4
%buf_extent29 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 2, i64 0
%4 = bitcast i32* %buf_extent29 to i8*
call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 48, i32 4, i1 false)
br label %after_bb
after_bb: ; preds = %entry, %true_bb
br i1 %f10.1.host_and_dev_are_null, label %after_bb42.thread, label
%after_bb42
after_bb42.thread: ; preds = %after_bb
%buf_elem_size43 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 5
store i32 4, i32* %buf_elem_size43, align 4
%buf_extent45 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 2, i64 0
%5 = bitcast i32* %buf_extent45 to i8*
call void @llvm.memset.p0i8.i64(i8* %5, i8 0, i64 48, i32 4, i1 false)
br label %destructor_block
after_bb42: ; preds = %after_bb
br i1 %f10.0.host_and_dev_are_null, label %destructor_block, label %"for
f8.s0.v0"
"for f8.s0.v0": ; preds = %after_bb42, %"for
f8.s0.v0"
%indvars.iv61 = phi i64 [ %9, %"for f8.s0.v0" ], [ -1, %after_bb42 ]
%f8.s0.v0 = phi i32 [ %14, %"for f8.s0.v0" ], [ -1, %after_bb42 ]
%6 = sub nsw i32 100, %f8.s0.v0
%7 = sext i32 %6 to i64
%8 = mul nsw i64 %7, %indvars.iv61
%9 = add nsw i64 %indvars.iv61, 1
%10 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 %9
%11 = trunc i64 %8 to i32
store i32 %11, i32* %10, align 4, !tbaa !4
%12 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 %9
%13 = trunc i64 %indvars.iv61 to i32
store i32 %13, i32* %12, align 4, !tbaa !6
%14 = add nsw i32 %f8.s0.v0, 1
%exitcond63 = icmp eq i64 %9, 100
br i1 %exitcond63, label %"for f8.s1.r30.x$r.preheader", label %"for
f8.s0.v0"
"for f8.s1.r30.x$r.preheader": ; preds = %"for f8.s0.v0"
%15 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 0
%16 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 0
%t24.pre = load i32, i32* %15, align 32, !tbaa !7
%.pre = load i32, i32* %16, align 32, !tbaa !18
br label %"for f8.s1.r30.x$r"
"for f8.s1.r30.x$r": ; preds = %"for
f8.s1.r30.x$r.preheader", %"for f8.s1.r30.x$r"
%17 = phi i32 [ %.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.1.value, %"for
f8.s1.r30.x$r" ]
%t24 = phi i32 [ %t24.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.0.value,
%"for f8.s1.r30.x$r" ]
%indvars.iv = phi i64 [ 0, %"for f8.s1.r30.x$r.preheader" ], [ %18, %"for
f8.s1.r30.x$r" ]
%18 = add nuw nsw i64 %indvars.iv, 1
%19 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 %18
%t25 = load i32, i32* %19, align 4, !tbaa !4
%20 = icmp slt i32 %t24, %t25
%f8.0.value = select i1 %20, i32 %t25, i32 %t24
%21 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 %18
%22 = load i32, i32* %21, align 4, !tbaa !6
%f8.1.value = select i1 %20, i32 %22, i32 %17
store i32 %f8.0.value, i32* %15, align 32, !tbaa !7
store i32 %f8.1.value, i32* %16, align 32, !tbaa !18
%exitcond = icmp eq i64 %18, 100
br i1 %exitcond, label %"consume f8", label %"for f8.s1.r30.x$r"
"consume f8": ; preds = %"for
f8.s1.r30.x$r"
%23 = bitcast i8* %f10.0.host to i32*
store i32 %f8.0.value, i32* %23, align 4, !tbaa !29
%24 = bitcast i8* %f10.1.host to i32*
store i32 %f8.1.value, i32* %24, align 4, !tbaa !41
br label %destructor_block
destructor_block: ; preds = %after_bb42.thread,
%"consume f8", %after_bb42
ret i32 0
}
; Function Attrs: nounwind
define i32 @f10(%struct.buffer_t* noalias nocapture %f10.0.buffer,
%struct.buffer_t* noalias nocapture %f10.1.buffer) #0 {
entry:
%__f10_result = tail call i32 @__f10(%struct.buffer_t* %f10.0.buffer,
%struct.buffer_t* %f10.1.buffer) #0
ret i32 0
}
; Function Attrs: nounwind
define i32 @f10_argv(i8** nocapture readonly) #0 {
entry:
%1 = bitcast i8** %0 to %struct.buffer_t**
%2 = load %struct.buffer_t*, %struct.buffer_t** %1, align 8
%3 = getelementptr i8*, i8** %0, i64 1
%4 = bitcast i8** %3 to %struct.buffer_t**
%5 = load %struct.buffer_t*, %struct.buffer_t** %4, align 8
%6 = tail call i32 @f10(%struct.buffer_t* %2, %struct.buffer_t* %5)
ret i32 0
}
; Function Attrs: nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
attributes #0 = { nounwind }
!llvm.ident = !{!0, !0}
!llvm.module.flags = !{!1, !2, !3}
!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final 251413)"}
!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
!2 = !{i32 2, !"halide_mcpu", !"generic"}
!3 = !{i32 2, !"halide_mattrs", !""}
!4 = !{!"f8.0", !5}
!5 = !{!"Halide buffer"}
!6 = !{!"f8.1", !5}
!7 = !{!"f8.0.width1.base0", !8}
!8 = !{!"f8.0.width2.base0", !9}
!9 = !{!"f8.0.width4.base0", !10}
!10 = !{!"f8.0.width8.base0", !11}
!11 = !{!"f8.0.width16.base0", !12}
!12 = !{!"f8.0.width32.base0", !13}
!13 = !{!"f8.0.width64.base0", !14}
!14 = !{!"f8.0.width128.base0", !15}
!15 = !{!"f8.0.width256.base0", !16}
!16 = !{!"f8.0.width512.base0", !17}
!17 = !{!"f8.0.width1024.base0", !4}
!18 = !{!"f8.1.width1.base0", !19}
!19 = !{!"f8.1.width2.base0", !20}
!20 = !{!"f8.1.width4.base0", !21}
!21 = !{!"f8.1.width8.base0", !22}
!22 = !{!"f8.1.width16.base0", !23}
!23 = !{!"f8.1.width32.base0", !24}
!24 = !{!"f8.1.width64.base0", !25}
!25 = !{!"f8.1.width128.base0", !26}
!26 = !{!"f8.1.width256.base0", !27}
!27 = !{!"f8.1.width512.base0", !28}
!28 = !{!"f8.1.width1024.base0", !6}
!29 = !{!"f10.0.width1.base0", !30}
!30 = !{!"f10.0.width2.base0", !31}
!31 = !{!"f10.0.width4.base0", !32}
!32 = !{!"f10.0.width8.base0", !33}
!33 = !{!"f10.0.width16.base0", !34}
!34 = !{!"f10.0.width32.base0", !35}
!35 = !{!"f10.0.width64.base0", !36}
!36 = !{!"f10.0.width128.base0", !37}
!37 = !{!"f10.0.width256.base0", !38}
!38 = !{!"f10.0.width512.base0", !39}
!39 = !{!"f10.0.width1024.base0", !40}
!40 = !{!"f10.0", !5}
!41 = !{!"f10.1.width1.base0", !42}
!42 = !{!"f10.1.width2.base0", !43}
!43 = !{!"f10.1.width4.base0", !44}
!44 = !{!"f10.1.width8.base0", !45}
!45 = !{!"f10.1.width16.base0", !46}
!46 = !{!"f10.1.width32.base0", !47}
!47 = !{!"f10.1.width64.base0", !48}
!48 = !{!"f10.1.width128.base0", !49}
!49 = !{!"f10.1.width256.base0", !50}
!50 = !{!"f10.1.width512.base0", !51}
!51 = !{!"f10.1.width1024.base0", !52}
!52 = !{!"f10.1", !5}
.text
.file "halide_module_f10"
.section .text.__f10,"ax", at progbits
.globl __f10
.align 2
.type __f10, at function
__f10: // @__f10
// BB#0: // %entry
stp x28, x27, [sp, #-32]!
stp x29, x30, [sp, #16]
add x29, sp, #16 // =16
sub x9, sp, #832 // =832
and sp, x9, #0xffffffffffffffe0
ldp x10, x8, [x0]
ldp x11, x9, [x1]
orr x12, x8, x10
orr x10, x9, x11
cmp x12, #0 // =0
cset w11, eq
cmp x10, #0 // =0
cset w10, eq
cbnz x12, .LBB0_2
// BB#1: // %true_bb
orr w12, wzr, #0x4
stp xzr, xzr, [x0, #48]
stp xzr, xzr, [x0, #32]
stp xzr, xzr, [x0, #16]
str w12, [x0, #64]
.LBB0_2: // %after_bb
cbz w10, .LBB0_4
// BB#3: // %after_bb42.thread
orr w8, wzr, #0x4
stp xzr, xzr, [x1, #48]
stp xzr, xzr, [x1, #32]
stp xzr, xzr, [x1, #16]
str w8, [x1, #64]
b .LBB0_10
.LBB0_4: // %after_bb42
movz w10, #0x65
tbnz w11, #0, .LBB0_10
// BB#5:
movn w11, #0
movn x12, #0
mov x13, sp
add x14, sp, #416 // =416
.LBB0_6: // %for f8.s0.v0
// =>This Inner Loop Header: Depth=1
mul w15, w10, w12
add x12, x12, #1 // =1
str w11, [x13], #4
add w11, w11, #1 // =1
str w15, [x14], #4
sub x10, x10, #1 // =1
cbnz x10, .LBB0_6
// BB#7: // %for f8.s1.r30.x$r.preheader
ldr w10, [sp, #416]
ldr w11, [sp]
mov x12, sp
orr x12, x12, #0x4
add x13, sp, #416 // =416
orr x13, x13, #0x4
movz w14, #0x64
.LBB0_8: // %for f8.s1.r30.x$r
// =>This Inner Loop Header: Depth=1
ldr w15, [x13], #4
ldr w16, [x12], #4
cmp w10, w15
csel w10, w15, w10, lt
csel w11, w16, w11, lt
str w10, [sp, #416]
str w11, [sp]
sub x14, x14, #1 // =1
cbnz x14, .LBB0_8
// BB#9: // %consume f8
str w10, [x8]
str w11, [x9]
.LBB0_10: // %destructor_block
mov w0, wzr
sub sp, x29, #16 // =16
ldp x29, x30, [sp, #16]
ldp x28, x27, [sp], #32
ret
.Lfunc_end0:
.size __f10, .Lfunc_end0-__f10
.section .text.f10,"ax", at progbits
.globl f10
.align 2
.type f10, at function
f10: // @f10
// BB#0: // %entry
stp x29, x30, [sp, #-16]!
mov x29, sp
bl __f10
mov w0, wzr
ldp x29, x30, [sp], #16
ret
.Lfunc_end1:
.size f10, .Lfunc_end1-f10
.section .text.f10_argv,"ax", at progbits
.globl f10_argv
.align 2
.type f10_argv, at function
f10_argv: // @f10_argv
// BB#0: // %entry
stp x29, x30, [sp, #-16]!
mov x29, sp
ldp x8, x1, [x0]
mov x0, x8
bl f10
mov w0, wzr
ldp x29, x30, [sp], #16
ret
.Lfunc_end2:
.size f10_argv, .Lfunc_end2-f10_argv
.type .Lstr, at object // @str
.section .rodata,"a", at progbits
.align 5
.Lstr:
.asciz "f10.0"
.size .Lstr, 6
.type .Lstr.2, at object // @str.2
.align 5
.Lstr.2:
.asciz "f10.1"
.size .Lstr.2, 6
.type .L__unnamed_1, at object // @0
.section .data.rel.ro.local,"aw", at progbits
.align 4
.L__unnamed_1:
.xword .Lstr
.word 2 // 0x2
.word 0 // 0x0
.word 0 // 0x0
.word 32 // 0x20
.xword 0
.xword 0
.xword 0
.xword .Lstr.2
.word 2 // 0x2
.word 0 // 0x0
.word 0 // 0x0
.word 32 // 0x20
.xword 0
.xword 0
.xword 0
.size .L__unnamed_1, 96
.type .Lstr.3, at object // @str.3
.section .rodata,"a", at progbits
.align 5
.Lstr.3:
.asciz "arm-64-linux-no_asserts-no_runtime"
.size .Lstr.3, 35
.type .Lstr.4, at object // @str.4
.align 5
.Lstr.4:
.asciz "f10"
.size .Lstr.4, 4
.type f10_metadata, at object // @f10_metadata
.section .data.rel.ro.local,"aw", at progbits
.globl f10_metadata
.align 4
f10_metadata:
.word 0 // 0x0
.word 2 // 0x2
.xword .L__unnamed_1
.xword .Lstr.3
.xword .Lstr.4
.size f10_metadata, 32
.ident "clang version 3.7.0 (tags/RELEASE_370/final 251413)"
.ident "clang version 3.7.0 (tags/RELEASE_370/final 251413)"
.section ".note.GNU-stack","", at progbits
Bad ll and asm:
; ModuleID = 'halide_module_f10'
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabihf"
%struct.halide_filter_argument_t = type { i8*, i32, i32, i32, i32,
%struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*,
%struct.halide_scalar_value_t* }
%struct.halide_scalar_value_t = type { %union.anon }
%union.anon = type { double }
%struct.halide_filter_metadata_t = type { i32, i32,
%struct.halide_filter_argument_t*, i8*, i8* }
%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8,
i8, [2 x i8] }
@str = private constant [6 x i8] c"f10.0\00", align 32
@str.2 = private constant [6 x i8] c"f10.1\00", align 32
@0 = private constant [2 x %struct.halide_filter_argument_t]
[%struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x
i8]* @str, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32,
%struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null,
%struct.halide_scalar_value_t* null }, %struct.halide_filter_argument_t { i8*
getelementptr inbounds ([6 x i8], [6 x i8]* @str.2, i32 0, i32 0), i32 2, i32
0, i32 0, i32 32, %struct.halide_scalar_value_t* null,
%struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }]
@str.3 = private constant [35 x i8] c"arm-64-linux-no_asserts-no_runtime\00",
align 32
@str.4 = private constant [4 x i8] c"f10\00", align 32
@f10_metadata = constant %struct.halide_filter_metadata_t { i32 0, i32 2,
%struct.halide_filter_argument_t* getelementptr inbounds ([2 x
%struct.halide_filter_argument_t], [2 x %struct.halide_filter_argument_t]* @0,
i32 0, i32 0), i8* getelementptr inbounds ([35 x i8], [35 x i8]* @str.3, i32 0,
i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str.4, i32 0, i32 0) }
; Function Attrs: norecurse nounwind
define i32 @__f10(%struct.buffer_t* noalias nocapture %f10.0.buffer,
%struct.buffer_t* noalias nocapture %f10.1.buffer) #0 {
entry:
%f8.0.host59 = alloca [13 x <8 x i32>], align 32
%f8.1.host60 = alloca [13 x <8 x i32>], align 32
%buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 1
%f10.0.host = load i8*, i8** %buf_host, align 8
%buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 0
%f10.0.dev = load i64, i64* %buf_dev, align 8
%0 = icmp eq i64 %f10.0.dev, 0
%1 = icmp eq i8* %f10.0.host, null
%f10.0.host_and_dev_are_null = and i1 %1, %0
%buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 1
%f10.1.host = load i8*, i8** %buf_host10, align 8
%buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 0
%f10.1.dev = load i64, i64* %buf_dev11, align 8
%2 = icmp eq i64 %f10.1.dev, 0
%3 = icmp eq i8* %f10.1.host, null
%f10.1.host_and_dev_are_null = and i1 %3, %2
br i1 %f10.0.host_and_dev_are_null, label %true_bb, label %after_bb
true_bb: ; preds = %entry
%buf_elem_size27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 5
store i32 4, i32* %buf_elem_size27, align 4
%buf_extent29 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.0.buffer, i64 0, i32 2, i64 0
%4 = bitcast i32* %buf_extent29 to i8*
call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 48, i32 4, i1 false)
br label %after_bb
after_bb: ; preds = %entry, %true_bb
br i1 %f10.1.host_and_dev_are_null, label %after_bb42.thread, label
%after_bb42
after_bb42.thread: ; preds = %after_bb
%buf_elem_size43 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 5
store i32 4, i32* %buf_elem_size43, align 4
%buf_extent45 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t*
%f10.1.buffer, i64 0, i32 2, i64 0
%5 = bitcast i32* %buf_extent45 to i8*
call void @llvm.memset.p0i8.i64(i8* %5, i8 0, i64 48, i32 4, i1 false)
br label %destructor_block
after_bb42: ; preds = %after_bb
br i1 %f10.0.host_and_dev_are_null, label %destructor_block, label %"for
f8.s0.v0"
"for f8.s0.v0": ; preds = %after_bb42, %"for
f8.s0.v0"
%indvars.iv61 = phi i64 [ %9, %"for f8.s0.v0" ], [ -1, %after_bb42 ]
%f8.s0.v0 = phi i32 [ %14, %"for f8.s0.v0" ], [ -1, %after_bb42 ]
%6 = sub nsw i32 100, %f8.s0.v0
%7 = sext i32 %6 to i64
%8 = mul nsw i64 %7, %indvars.iv61
%9 = add nsw i64 %indvars.iv61, 1
%10 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 %9
%11 = trunc i64 %8 to i32
store i32 %11, i32* %10, align 4, !tbaa !4
%12 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 %9
%13 = trunc i64 %indvars.iv61 to i32
store i32 %13, i32* %12, align 4, !tbaa !6
%14 = add nsw i32 %f8.s0.v0, 1
%15 = icmp eq i64 %9, 100
br i1 %15, label %"for f8.s1.r30.x$r.preheader", label %"for f8.s0.v0"
"for f8.s1.r30.x$r.preheader": ; preds = %"for f8.s0.v0"
%16 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 0
%17 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 0
%t24.pre = load i32, i32* %16, align 32, !tbaa !7
%.pre = load i32, i32* %17, align 32, !tbaa !18
br label %"for f8.s1.r30.x$r"
"for f8.s1.r30.x$r": ; preds = %"for
f8.s1.r30.x$r.preheader", %"for f8.s1.r30.x$r"
%18 = phi i32 [ %.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.1.value, %"for
f8.s1.r30.x$r" ]
%t24 = phi i32 [ %t24.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.0.value,
%"for f8.s1.r30.x$r" ]
%indvars.iv = phi i64 [ 0, %"for f8.s1.r30.x$r.preheader" ], [ %19, %"for
f8.s1.r30.x$r" ]
%19 = add nuw nsw i64 %indvars.iv, 1
%20 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.0.host59, i64 0, i64 0, i64 %19
%t25 = load i32, i32* %20, align 4, !tbaa !4
%21 = icmp slt i32 %t24, %t25
%f8.0.value = select i1 %21, i32 %t25, i32 %t24
%22 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]*
%f8.1.host60, i64 0, i64 0, i64 %19
%23 = load i32, i32* %22, align 4, !tbaa !6
%f8.1.value = select i1 %21, i32 %23, i32 %18
store i32 %f8.0.value, i32* %16, align 32, !tbaa !7
store i32 %f8.1.value, i32* %17, align 32, !tbaa !18
%24 = icmp eq i64 %19, 100
br i1 %24, label %"consume f8", label %"for f8.s1.r30.x$r"
"consume f8": ; preds = %"for
f8.s1.r30.x$r"
%25 = bitcast i8* %f10.0.host to i32*
store i32 %f8.0.value, i32* %25, align 4, !tbaa !29
%26 = bitcast i8* %f10.1.host to i32*
store i32 %f8.1.value, i32* %26, align 4, !tbaa !41
br label %destructor_block
destructor_block: ; preds = %after_bb42.thread,
%"consume f8", %after_bb42
ret i32 0
}
; Function Attrs: norecurse nounwind
define i32 @f10(%struct.buffer_t* noalias nocapture %f10.0.buffer,
%struct.buffer_t* noalias nocapture %f10.1.buffer) #0 {
entry:
%__f10_result = tail call i32 @__f10(%struct.buffer_t* %f10.0.buffer,
%struct.buffer_t* %f10.1.buffer) #2
ret i32 0
}
; Function Attrs: norecurse nounwind
define i32 @f10_argv(i8** nocapture readonly) #0 {
entry:
%1 = bitcast i8** %0 to %struct.buffer_t**
%2 = load %struct.buffer_t*, %struct.buffer_t** %1, align 8
%3 = getelementptr i8*, i8** %0, i64 1
%4 = bitcast i8** %3 to %struct.buffer_t**
%5 = load %struct.buffer_t*, %struct.buffer_t** %4, align 8
%6 = tail call i32 @f10(%struct.buffer_t* %2, %struct.buffer_t* %5)
ret i32 0
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
attributes #0 = { norecurse nounwind }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }
!llvm.ident = !{!0, !0}
!llvm.module.flags = !{!1, !2, !3}
!0 = !{!"clang version 3.9.0 (trunk 260979)"}
!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
!2 = !{i32 2, !"halide_mcpu", !"generic"}
!3 = !{i32 2, !"halide_mattrs", !""}
!4 = !{!"f8.0", !5}
!5 = !{!"Halide buffer"}
!6 = !{!"f8.1", !5}
!7 = !{!"f8.0.width1.base0", !8}
!8 = !{!"f8.0.width2.base0", !9}
!9 = !{!"f8.0.width4.base0", !10}
!10 = !{!"f8.0.width8.base0", !11}
!11 = !{!"f8.0.width16.base0", !12}
!12 = !{!"f8.0.width32.base0", !13}
!13 = !{!"f8.0.width64.base0", !14}
!14 = !{!"f8.0.width128.base0", !15}
!15 = !{!"f8.0.width256.base0", !16}
!16 = !{!"f8.0.width512.base0", !17}
!17 = !{!"f8.0.width1024.base0", !4}
!18 = !{!"f8.1.width1.base0", !19}
!19 = !{!"f8.1.width2.base0", !20}
!20 = !{!"f8.1.width4.base0", !21}
!21 = !{!"f8.1.width8.base0", !22}
!22 = !{!"f8.1.width16.base0", !23}
!23 = !{!"f8.1.width32.base0", !24}
!24 = !{!"f8.1.width64.base0", !25}
!25 = !{!"f8.1.width128.base0", !26}
!26 = !{!"f8.1.width256.base0", !27}
!27 = !{!"f8.1.width512.base0", !28}
!28 = !{!"f8.1.width1024.base0", !6}
!29 = !{!"f10.0.width1.base0", !30}
!30 = !{!"f10.0.width2.base0", !31}
!31 = !{!"f10.0.width4.base0", !32}
!32 = !{!"f10.0.width8.base0", !33}
!33 = !{!"f10.0.width16.base0", !34}
!34 = !{!"f10.0.width32.base0", !35}
!35 = !{!"f10.0.width64.base0", !36}
!36 = !{!"f10.0.width128.base0", !37}
!37 = !{!"f10.0.width256.base0", !38}
!38 = !{!"f10.0.width512.base0", !39}
!39 = !{!"f10.0.width1024.base0", !40}
!40 = !{!"f10.0", !5}
!41 = !{!"f10.1.width1.base0", !42}
!42 = !{!"f10.1.width2.base0", !43}
!43 = !{!"f10.1.width4.base0", !44}
!44 = !{!"f10.1.width8.base0", !45}
!45 = !{!"f10.1.width16.base0", !46}
!46 = !{!"f10.1.width32.base0", !47}
!47 = !{!"f10.1.width64.base0", !48}
!48 = !{!"f10.1.width128.base0", !49}
!49 = !{!"f10.1.width256.base0", !50}
!50 = !{!"f10.1.width512.base0", !51}
!51 = !{!"f10.1.width1024.base0", !52}
!52 = !{!"f10.1", !5}
.text
.file "halide_module_f10"
.section .text.__f10,"ax", at progbits
.globl __f10
.p2align 2
.type __f10, at function
__f10: // @__f10
// BB#0: // %entry
ldp x10, x8, [x0]
ldp x11, x9, [x1]
orr x12, x8, x10
orr x10, x9, x11
cmp x12, #0 // =0
cset w11, eq
cmp x10, #0 // =0
cset w10, eq
cbnz x12, .LBB0_2
// BB#1: // %true_bb
orr w12, wzr, #0x4
stp xzr, xzr, [x0, #48]
stp xzr, xzr, [x0, #32]
stp xzr, xzr, [x0, #16]
str w12, [x0, #64]
.LBB0_2: Success!
// %after_bb
cbz w10, .LBB0_4
// BB#3: // %after_bb42.thread
orr w8, wzr, #0x4
mov w0, wzr
stp xzr, xzr, [x1, #48]
stp xzr, xzr, [x1, #32]
stp xzr, xzr, [x1, #16]
str w8, [x1, #64]
ret
.LBB0_4: // %after_bb42
movz w10, #0x65
tbnz w11, #0, .LBB0_10
// BB#5:
str x28, [sp, #-32]!
sub x9, sp, #832 // =832
stp x29, x30, [sp, #16]
add x29, sp, #16 // =16
and sp, x9, #0xffffffffffffffe0
movn x13, #0
mov x11, sp
add x12, sp, #416 // =416
.LBB0_6: // %"for f8.s0.v0"
// =>This Inner Loop Header: Depth=1
mul w14, w10, w13
str w13, [x11], #4
add x13, x13, #1 // =1
sub x10, x10, #1 // =1
str w14, [x12], #4
cmp x13, #100 // =100
b.ne .LBB0_6
// BB#7: // %"for f8.s1.r30.x$r.preheader"
ldr w10, [sp, #416]
ldr w11, [sp]
orr w12, wzr, #0x4
add x13, sp, #416 // =416
mov x14, sp
.LBB0_8: // %"for f8.s1.r30.x$r"
// =>This Inner Loop Header: Depth=1
ldr w15, [x13, x12]
ldr w16, [x14, x12]
add x12, x12, #4 // =4
cmp w10, w15
csel w10, w15, w10, lt
csel w11, w16, w11, lt
str w10, [sp, #416]
str w11, [sp]
cmp x12, #404 // =404
b.ne .LBB0_8
// BB#9: // %"consume f8"
str w10, [x8]
str w11, [x9]
sub sp, x29, #16 // =16
ldp x29, x30, [sp, #16]
ldr x28, [sp], #32
.LBB0_10: // %destructor_block
mov w0, wzr
ret
.Lfunc_end0:
.size __f10, .Lfunc_end0-__f10
.section .text.f10,"ax", at progbits
.globl f10
.p2align 2
.type f10, at function
f10: // @f10
// BB#0: // %entry
stp x29, x30, [sp, #-16]!
mov x29, sp
bl __f10
mov w0, wzr
ldp x29, x30, [sp], #16
ret
.Lfunc_end1:
.size f10, .Lfunc_end1-f10
.section .text.f10_argv,"ax", at progbits
.globl f10_argv
.p2align 2
.type f10_argv, at function
f10_argv: // @f10_argv
// BB#0: // %entry
stp x29, x30, [sp, #-16]!
ldp x8, x1, [x0]
mov x29, sp
mov x0, x8
bl f10
mov w0, wzr
ldp x29, x30, [sp], #16
ret
.Lfunc_end2:
.size f10_argv, .Lfunc_end2-f10_argv
.type .Lstr, at object // @str
.section .rodata,"a", at progbits
.p2align 5
.Lstr:
.asciz "f10.0"
.size .Lstr, 6
.type .Lstr.2, at object // @str.2
.p2align 5
.Lstr.2:
.asciz "f10.1"
.size .Lstr.2, 6
.type .L__unnamed_1, at object // @0
.section .data.rel.ro,"aw", at progbits
.p2align 4
.L__unnamed_1:
.xword .Lstr
.word 2 // 0x2
.word 0 // 0x0
.word 0 // 0x0
.word 32 // 0x20
.xword 0
.xword 0
.xword 0
.xword .Lstr.2
.word 2 // 0x2
.word 0 // 0x0
.word 0 // 0x0
.word 32 // 0x20
.xword 0
.xword 0
.xword 0
.size .L__unnamed_1, 96
.type .Lstr.3, at object // @str.3
.section .rodata,"a", at progbits
.p2align 5
.Lstr.3:
.asciz "arm-64-linux-no_asserts-no_runtime"
.size .Lstr.3, 35
.type .Lstr.4, at object // @str.4
.p2align 5
.Lstr.4:
.asciz "f10"
.size .Lstr.4, 4
.type f10_metadata, at object // @f10_metadata
.section .data.rel.ro,"aw", at progbits
.globl f10_metadata
.p2align 4
f10_metadata:
.word 0 // 0x0
.word 2 // 0x2
.xword .L__unnamed_1
.xword .Lstr.3
.xword .Lstr.4
.size f10_metadata, 32
.ident "clang version 3.9.0 (trunk 260979)"
.ident "clang version 3.9.0 (trunk 260979)"
.section ".note.GNU-stack","", at progbits
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160216/7a913361/attachment-0001.html>
More information about the llvm-bugs
mailing list