[llvm] r212324 - [x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
Tobias Grosser
tobias at grosser.es
Mon Jul 7 11:37:49 PDT 2014
On 04/07/2014 10:11, Chandler Carruth wrote:
> Author: chandlerc
> Date: Fri Jul 4 03:11:49 2014
> New Revision: 212324
>
> URL: http://llvm.org/viewvc/llvm-project?rev=212324&view=rev
> Log:
> [x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
> any constant, constant FP, or undef splat and to tolerate any undef
> lanes in a splat, then replace all uses of isSplatVector in X86's
> lowering with it.
>
> This fixes issues where undef lanes in an otherwise splat vector would
> prevent the splat logic from firing. It is a touch more awkward to use
> this interface, but it is much more accurate. Suggestions for better
> interface structuring welcome.
>
> With this fix, the code generated with the widening legalization
> strategy for widen_cast-4.ll is *dramatically* improved as the special
> lowering strategies for a v16i8 SRA kick in even though the high lanes
> are undef.
>
> We also get a slightly different choice for broadcasting an aligned
> memory location, and use vpshufd instead of vbroadcastss. This looks
> like a minor win for pipelining and domain crossing, but a minor loss
> for the number of micro-ops. I suspect its a wash, but folks can easily
> tweak the lowering if they want.
>
> Modified:
> llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/vector-gep.ll
> llvm/trunk/test/CodeGen/X86/widen_cast-4.ll
The following commit caused a test-suite failure in my nightly test
suite runners.
If you compile out.ll with clang the output before and after this
change will be different. The original .s file was very different
between the two versions. To pinpoint the bug, I replaced all undef
values with 'zeroinitializers', such that there is only one undef value
and consequently only a single difference in the generated assembly file.
Without understanding the code, the piece of LLVM-IR we look at is the
following:
%24 = trunc i64 0 to i32
%broadcast.splatinsert18.unr = insertelement <2 x i32> undef, i32
%24, i32 0
%broadcast.splat19.unr = shufflevector <2 x i32>
%broadcast.splatinsert18.unr, <2 x i32> zeroinitializer, <2 x i32>
zeroinitializer
%induction20.unr = add <2 x i32> %broadcast.splat19.unr, <i32 0, i32 1>
%induction21.unr = add <2 x i32> %broadcast.splat19.unr, <i32 2, i32 3>
%25 = getelementptr i32* %1, i64 0
%26 = bitcast i32* %25 to <2 x i32>*
store <2 x i32> %induction20.unr, <2 x i32>* %26, align 4
%.sum98.unr = or i64 0, 2
%27 = getelementptr i32* %1, i64 %.sum98.unr
%28 = bitcast i32* %27 to <2 x i32>*
store <2 x i32> %induction21.unr, <2 x i32>* %28, align 4
%index.next.unr = add i64 0, 4
%29 = icmp eq i64 %index.next.unr, %n.vec
and this is the corresponding assembly file change:
.LBB0_15: # %vector.body.unr
movabsq $4, %rax
- movl $1, %ecx
+ movl $2, %ecx
Unrelated?
movl %ecx, %edx
movd %rdx, %xmm0
- pslldq $8, %xmm0
- movaps .LCPI0_0(%rip), %xmm1
- pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
+ xorps %xmm1, %xmm1
Do we loose some data here?
movq 296(%rsp), %rdx # 8-byte Reload
- movq %xmm0, (%rdx)
- pshufd $8, %xmm1, %xmm0 # xmm0 = xmm1[0,2,0,0]
+ movq %xmm1, (%rdx)
+ pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
Just different instruction selection?
movq %xmm0, 8(%rdx)
addq $0, %rax
movq %rax, 168(%rsp) # 8-byte Spill
Cheers,
Tobias
-------------- next part --------------
; ModuleID = '/home/grosser/Projects/polly/test-suite/SingleSource/Regression/C/sumarraymalloc.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [11 x i8] c"Sum1 = %d\0A\00", align 1
@.str1 = private unnamed_addr constant [11 x i8] c"Sum2 = %d\0A\00", align 1
; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
entry:
%cmp = icmp slt i32 %argc, 2
br i1 %cmp, label %cond.end, label %cond.false
cond.false: ; preds = %entry
%arrayidx = getelementptr inbounds i8** %argv, i64 1
%0 = load i8** %arrayidx, align 8, !tbaa !1
%call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #2
%conv.i = trunc i64 %call.i to i32
br label %cond.end
cond.end: ; preds = %entry, %cond.false
%cond.reg2mem.0 = phi i32 [ 100, %entry ], [ %conv.i, %cond.false ]
%conv = sext i32 %cond.reg2mem.0 to i64
%mul = shl nsw i64 %conv, 2
%call1 = tail call noalias i8* @malloc(i64 %mul) #2
%1 = bitcast i8* %call1 to i32*
%2 = icmp eq i32 %cond.reg2mem.0, 0
br i1 %2, label %vector.body57.preheader, label %polly.cond3.i
polly.cond3.i: ; preds = %cond.end
%3 = zext i32 %cond.reg2mem.0 to i64
%4 = icmp slt i32 %cond.reg2mem.0, 1
%polly.adjust_ub16.i.pre = add nsw i64 %3, -2
br i1 %4, label %polly.loop_header10.preheader.i, label %polly.loop_header.i.preheader
polly.loop_header.i.preheader: ; preds = %polly.cond3.i
%5 = zext i32 %cond.reg2mem.0 to i64
%6 = add nsw i64 %5, -1
%7 = icmp sgt i64 %6, 0
%smax = select i1 %7, i64 %6, i64 0
%backedge.overflow = icmp eq i64 %smax, -1
br i1 %backedge.overflow, label %polly.loop_header.i.preheader100, label %overflow.checked
polly.loop_header.i.preheader100: ; preds = %middle.block, %polly.loop_header.i.preheader
%polly.indvar.i.ph = phi i64 [ 0, %polly.loop_header.i.preheader ], [ %resume.val, %middle.block ]
%8 = zext i32 %cond.reg2mem.0 to i64
%9 = add i64 %8, -1
%10 = icmp sgt i64 %polly.indvar.i.ph, %9
%smax122 = select i1 %10, i64 %polly.indvar.i.ph, i64 %9
%11 = add i64 %smax122, 1
%12 = sub i64 %11, %polly.indvar.i.ph
%xtraiter123 = and i64 %12, 3
%lcmp.mod124 = icmp ne i64 %xtraiter123, 0
%lcmp.overflow125 = icmp eq i64 %12, 0
%lcmp.or126 = or i1 %lcmp.overflow125, %lcmp.mod124
br i1 %lcmp.or126, label %unr.cmp140, label %polly.loop_header.i.preheader100.split
unr.cmp140: ; preds = %polly.loop_header.i.preheader100
%un.tmp141 = icmp eq i64 %xtraiter123, 1
br i1 %un.tmp141, label %polly.loop_header.i.unr134, label %unr.cmp132
unr.cmp132: ; preds = %unr.cmp140
%un.tmp133 = icmp eq i64 %xtraiter123, 2
br i1 %un.tmp133, label %polly.loop_header.i.unr127, label %polly.loop_header.i.unr
polly.loop_header.i.unr: ; preds = %unr.cmp132
%p_i.02.i.unr = trunc i64 %polly.indvar.i.ph to i32
%p_arrayidx.i.unr = getelementptr i32* %1, i64 %polly.indvar.i.ph
store i32 %p_i.02.i.unr, i32* %p_arrayidx.i.unr, align 4
%polly.indvar_next.i.unr = add nsw i64 %polly.indvar.i.ph, 1
%polly.loop_cond.i.unr = icmp sgt i64 %polly.indvar.i.ph, %polly.adjust_ub16.i.pre
br label %polly.loop_header.i.unr127
polly.loop_header.i.unr127: ; preds = %unr.cmp132, %polly.loop_header.i.unr
%polly.indvar.i.unr = phi i64 [ %polly.indvar_next.i.unr, %polly.loop_header.i.unr ], [ %polly.indvar.i.ph, %unr.cmp132 ]
%p_i.02.i.unr128 = trunc i64 %polly.indvar.i.unr to i32
%p_arrayidx.i.unr129 = getelementptr i32* %1, i64 %polly.indvar.i.unr
store i32 %p_i.02.i.unr128, i32* %p_arrayidx.i.unr129, align 4
%polly.indvar_next.i.unr130 = add nsw i64 %polly.indvar.i.unr, 1
%polly.loop_cond.i.unr131 = icmp sgt i64 %polly.indvar.i.unr, %polly.adjust_ub16.i.pre
br label %polly.loop_header.i.unr134
polly.loop_header.i.unr134: ; preds = %unr.cmp140, %polly.loop_header.i.unr127
%polly.indvar.i.unr135 = phi i64 [ %polly.indvar_next.i.unr130, %polly.loop_header.i.unr127 ], [ %polly.indvar.i.ph, %unr.cmp140 ]
%p_i.02.i.unr136 = trunc i64 %polly.indvar.i.unr135 to i32
%p_arrayidx.i.unr137 = getelementptr i32* %1, i64 %polly.indvar.i.unr135
store i32 %p_i.02.i.unr136, i32* %p_arrayidx.i.unr137, align 4
%polly.indvar_next.i.unr138 = add nsw i64 %polly.indvar.i.unr135, 1
%polly.loop_cond.i.unr139 = icmp sgt i64 %polly.indvar.i.unr135, %polly.adjust_ub16.i.pre
br label %polly.loop_header.i.preheader100.split
polly.loop_header.i.preheader100.split: ; preds = %polly.loop_header.i.unr134, %polly.loop_header.i.preheader100
%polly.indvar.i.unr142 = phi i64 [ %polly.indvar.i.ph, %polly.loop_header.i.preheader100 ], [ %polly.indvar_next.i.unr138, %polly.loop_header.i.unr134 ]
%13 = icmp ult i64 %12, 4
br i1 %13, label %polly.cond6.i.loopexit, label %polly.loop_header.i.preheader100.split.split
polly.loop_header.i.preheader100.split.split: ; preds = %polly.loop_header.i.preheader100.split
br label %polly.loop_header.i
overflow.checked: ; preds = %polly.loop_header.i.preheader
%14 = add nsw i64 %smax, 1
%end.idx = add nsw i64 %smax, 1
%n.vec = and i64 %14, -4
%cmp.zero = icmp eq i64 %n.vec, 0
br i1 %cmp.zero, label %middle.block, label %vector.body.preheader
vector.body.preheader: ; preds = %overflow.checked
%15 = zext i32 %cond.reg2mem.0 to i64
%16 = add i64 %15, -1
%17 = icmp sgt i64 %16, 0
%smax143 = select i1 %17, i64 %16, i64 0
%18 = add i64 %smax143, 1
%19 = lshr i64 %18, 2
%20 = mul i64 %19, 4
%21 = add i64 %20, -4
%22 = lshr i64 %21, 2
%23 = add i64 %22, 1
%xtraiter144 = and i64 %23, 1
%lcmp.mod145 = icmp ne i64 %xtraiter144, 0
%lcmp.overflow146 = icmp eq i64 %23, 0
%lcmp.or147 = or i1 %lcmp.overflow146, %lcmp.mod145
br i1 %lcmp.or147, label %vector.body.unr, label %vector.body.preheader.split
vector.body.unr: ; preds = %vector.body.preheader
%24 = trunc i64 0 to i32
%broadcast.splatinsert18.unr = insertelement <2 x i32> undef, i32 %24, i32 0
%broadcast.splat19.unr = shufflevector <2 x i32> %broadcast.splatinsert18.unr, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction20.unr = add <2 x i32> %broadcast.splat19.unr, <i32 0, i32 1>
%induction21.unr = add <2 x i32> %broadcast.splat19.unr, <i32 2, i32 3>
%25 = getelementptr i32* %1, i64 0
%26 = bitcast i32* %25 to <2 x i32>*
store <2 x i32> %induction20.unr, <2 x i32>* %26, align 4
%.sum98.unr = or i64 0, 2
%27 = getelementptr i32* %1, i64 %.sum98.unr
%28 = bitcast i32* %27 to <2 x i32>*
store <2 x i32> %induction21.unr, <2 x i32>* %28, align 4
%index.next.unr = add i64 0, 4
%29 = icmp eq i64 %index.next.unr, %n.vec
br label %vector.body.preheader.split
vector.body.preheader.split: ; preds = %vector.body.unr, %vector.body.preheader
%index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.unr, %vector.body.unr ]
%30 = icmp ult i64 %23, 2
br i1 %30, label %middle.block.loopexit, label %vector.body.preheader.split.split
vector.body.preheader.split.split: ; preds = %vector.body.preheader.split
br label %vector.body
vector.body: ; preds = %vector.body, %vector.body.preheader.split.split
%index = phi i64 [ %index.unr, %vector.body.preheader.split.split ], [ %index.next.1, %vector.body ]
%31 = trunc i64 %index to i32
%broadcast.splatinsert18 = insertelement <2 x i32> zeroinitializer, i32 %31, i32 0
%broadcast.splat19 = shufflevector <2 x i32> %broadcast.splatinsert18, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction20 = add <2 x i32> %broadcast.splat19, <i32 0, i32 1>
%induction21 = add <2 x i32> %broadcast.splat19, <i32 2, i32 3>
%32 = getelementptr i32* %1, i64 %index
%33 = bitcast i32* %32 to <2 x i32>*
store <2 x i32> %induction20, <2 x i32>* %33, align 4
%.sum98 = or i64 %index, 2
%34 = getelementptr i32* %1, i64 %.sum98
%35 = bitcast i32* %34 to <2 x i32>*
store <2 x i32> %induction21, <2 x i32>* %35, align 4
%index.next = add i64 %index, 4
%36 = trunc i64 %index.next to i32
%broadcast.splatinsert18.1 = insertelement <2 x i32> zeroinitializer, i32 %36, i32 0
%broadcast.splat19.1 = shufflevector <2 x i32> %broadcast.splatinsert18.1, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction20.1 = add <2 x i32> %broadcast.splat19.1, <i32 0, i32 1>
%induction21.1 = add <2 x i32> %broadcast.splat19.1, <i32 2, i32 3>
%37 = getelementptr i32* %1, i64 %index.next
%38 = bitcast i32* %37 to <2 x i32>*
store <2 x i32> %induction20.1, <2 x i32>* %38, align 4
%.sum98.1 = or i64 %index.next, 2
%39 = getelementptr i32* %1, i64 %.sum98.1
%40 = bitcast i32* %39 to <2 x i32>*
store <2 x i32> %induction21.1, <2 x i32>* %40, align 4
%index.next.1 = add i64 %index.next, 4
%41 = icmp eq i64 %index.next.1, %n.vec
br i1 %41, label %middle.block.loopexit.unr-lcssa, label %vector.body, !llvm.loop !5
middle.block.loopexit.unr-lcssa: ; preds = %vector.body
br label %middle.block.loopexit
middle.block.loopexit: ; preds = %vector.body.preheader.split, %middle.block.loopexit.unr-lcssa
br label %middle.block
middle.block: ; preds = %middle.block.loopexit, %overflow.checked
%resume.val = phi i64 [ 0, %overflow.checked ], [ %n.vec, %middle.block.loopexit ]
%cmp.n = icmp eq i64 %end.idx, %resume.val
br i1 %cmp.n, label %polly.cond6.i, label %polly.loop_header.i.preheader100
polly.cond6.i.loopexit.unr-lcssa: ; preds = %polly.loop_header.i
br label %polly.cond6.i.loopexit
polly.cond6.i.loopexit: ; preds = %polly.loop_header.i.preheader100.split, %polly.cond6.i.loopexit.unr-lcssa
br label %polly.cond6.i
polly.cond6.i: ; preds = %polly.cond6.i.loopexit, %middle.block
%42 = icmp sgt i32 %cond.reg2mem.0, -1
br i1 %42, label %vector.body57.preheader, label %polly.loop_header10.preheader.i
polly.loop_header10.preheader.i: ; preds = %polly.cond3.i, %polly.cond6.i
%43 = zext i32 %cond.reg2mem.0 to i64
%44 = add nsw i64 %43, -1
%45 = icmp sgt i64 %44, 0
%smax24 = select i1 %45, i64 %44, i64 0
%backedge.overflow25 = icmp eq i64 %smax24, -1
br i1 %backedge.overflow25, label %polly.loop_header10.i.preheader, label %overflow.checked37
polly.loop_header10.i.preheader: ; preds = %middle.block29, %polly.loop_header10.preheader.i
%polly.indvar14.i.ph = phi i64 [ 0, %polly.loop_header10.preheader.i ], [ %resume.val38, %middle.block29 ]
%46 = zext i32 %cond.reg2mem.0 to i64
%47 = add i64 %46, -1
%48 = icmp sgt i64 %polly.indvar14.i.ph, %47
%smax102 = select i1 %48, i64 %polly.indvar14.i.ph, i64 %47
%49 = add i64 %smax102, 1
%50 = sub i64 %49, %polly.indvar14.i.ph
%xtraiter = and i64 %50, 3
%lcmp.mod = icmp ne i64 %xtraiter, 0
%lcmp.overflow = icmp eq i64 %50, 0
%lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
br i1 %lcmp.or, label %unr.cmp114, label %polly.loop_header10.i.preheader.split
unr.cmp114: ; preds = %polly.loop_header10.i.preheader
%un.tmp115 = icmp eq i64 %xtraiter, 1
br i1 %un.tmp115, label %polly.loop_header10.i.unr108, label %unr.cmp
unr.cmp: ; preds = %unr.cmp114
%un.tmp = icmp eq i64 %xtraiter, 2
br i1 %un.tmp, label %polly.loop_header10.i.unr103, label %polly.loop_header10.i.unr
polly.loop_header10.i.unr: ; preds = %unr.cmp
%p_i.0220.i.unr = trunc i64 %polly.indvar14.i.ph to i32
%p_arrayidx21.i.unr = getelementptr i32* %1, i64 %polly.indvar14.i.ph
store i32 %p_i.0220.i.unr, i32* %p_arrayidx21.i.unr, align 4
%polly.indvar_next15.i.unr = add nsw i64 %polly.indvar14.i.ph, 1
%polly.loop_cond17.i.unr = icmp sgt i64 %polly.indvar14.i.ph, %polly.adjust_ub16.i.pre
br label %polly.loop_header10.i.unr103
polly.loop_header10.i.unr103: ; preds = %unr.cmp, %polly.loop_header10.i.unr
%polly.indvar14.i.unr = phi i64 [ %polly.indvar_next15.i.unr, %polly.loop_header10.i.unr ], [ %polly.indvar14.i.ph, %unr.cmp ]
%p_i.0220.i.unr104 = trunc i64 %polly.indvar14.i.unr to i32
%p_arrayidx21.i.unr105 = getelementptr i32* %1, i64 %polly.indvar14.i.unr
store i32 %p_i.0220.i.unr104, i32* %p_arrayidx21.i.unr105, align 4
%polly.indvar_next15.i.unr106 = add nsw i64 %polly.indvar14.i.unr, 1
%polly.loop_cond17.i.unr107 = icmp sgt i64 %polly.indvar14.i.unr, %polly.adjust_ub16.i.pre
br label %polly.loop_header10.i.unr108
polly.loop_header10.i.unr108: ; preds = %unr.cmp114, %polly.loop_header10.i.unr103
%polly.indvar14.i.unr109 = phi i64 [ %polly.indvar_next15.i.unr106, %polly.loop_header10.i.unr103 ], [ %polly.indvar14.i.ph, %unr.cmp114 ]
%p_i.0220.i.unr110 = trunc i64 %polly.indvar14.i.unr109 to i32
%p_arrayidx21.i.unr111 = getelementptr i32* %1, i64 %polly.indvar14.i.unr109
store i32 %p_i.0220.i.unr110, i32* %p_arrayidx21.i.unr111, align 4
%polly.indvar_next15.i.unr112 = add nsw i64 %polly.indvar14.i.unr109, 1
%polly.loop_cond17.i.unr113 = icmp sgt i64 %polly.indvar14.i.unr109, %polly.adjust_ub16.i.pre
br label %polly.loop_header10.i.preheader.split
polly.loop_header10.i.preheader.split: ; preds = %polly.loop_header10.i.unr108, %polly.loop_header10.i.preheader
%polly.indvar14.i.unr116 = phi i64 [ %polly.indvar14.i.ph, %polly.loop_header10.i.preheader ], [ %polly.indvar_next15.i.unr112, %polly.loop_header10.i.unr108 ]
%51 = icmp ult i64 %50, 4
br i1 %51, label %vector.body57.preheader.loopexit, label %polly.loop_header10.i.preheader.split.split
polly.loop_header10.i.preheader.split.split: ; preds = %polly.loop_header10.i.preheader.split
br label %polly.loop_header10.i
overflow.checked37: ; preds = %polly.loop_header10.preheader.i
%52 = add nsw i64 %smax24, 1
%end.idx32 = add nsw i64 %smax24, 1
%n.vec34 = and i64 %52, -4
%cmp.zero36 = icmp eq i64 %n.vec34, 0
br i1 %cmp.zero36, label %middle.block29, label %vector.body28.preheader
vector.body28.preheader: ; preds = %overflow.checked37
%53 = zext i32 %cond.reg2mem.0 to i64
%54 = add i64 %53, -1
%55 = icmp sgt i64 %54, 0
%smax117 = select i1 %55, i64 %54, i64 0
%56 = add i64 %smax117, 1
%57 = lshr i64 %56, 2
%58 = mul i64 %57, 4
%59 = add i64 %58, -4
%60 = lshr i64 %59, 2
%61 = add i64 %60, 1
%xtraiter118 = and i64 %61, 1
%lcmp.mod119 = icmp ne i64 %xtraiter118, 0
%lcmp.overflow120 = icmp eq i64 %61, 0
%lcmp.or121 = or i1 %lcmp.overflow120, %lcmp.mod119
br i1 %lcmp.or121, label %vector.body28.unr, label %vector.body28.preheader.split
vector.body28.unr: ; preds = %vector.body28.preheader
%62 = trunc i64 0 to i32
%broadcast.splatinsert48.unr = insertelement <2 x i32> zeroinitializer, i32 %62, i32 0
%broadcast.splat49.unr = shufflevector <2 x i32> %broadcast.splatinsert48.unr, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction50.unr = add <2 x i32> %broadcast.splat49.unr, <i32 0, i32 1>
%induction51.unr = add <2 x i32> %broadcast.splat49.unr, <i32 2, i32 3>
%63 = getelementptr i32* %1, i64 0
%64 = bitcast i32* %63 to <2 x i32>*
store <2 x i32> %induction50.unr, <2 x i32>* %64, align 4
%.sum99.unr = or i64 0, 2
%65 = getelementptr i32* %1, i64 %.sum99.unr
%66 = bitcast i32* %65 to <2 x i32>*
store <2 x i32> %induction51.unr, <2 x i32>* %66, align 4
%index.next43.unr = add i64 0, 4
%67 = icmp eq i64 %index.next43.unr, %n.vec34
br label %vector.body28.preheader.split
vector.body28.preheader.split: ; preds = %vector.body28.unr, %vector.body28.preheader
%index31.unr = phi i64 [ 0, %vector.body28.preheader ], [ %index.next43.unr, %vector.body28.unr ]
%68 = icmp ult i64 %61, 2
br i1 %68, label %middle.block29.loopexit, label %vector.body28.preheader.split.split
vector.body28.preheader.split.split: ; preds = %vector.body28.preheader.split
br label %vector.body28
vector.body28: ; preds = %vector.body28, %vector.body28.preheader.split.split
%index31 = phi i64 [ %index31.unr, %vector.body28.preheader.split.split ], [ %index.next43.1, %vector.body28 ]
%69 = trunc i64 %index31 to i32
%broadcast.splatinsert48 = insertelement <2 x i32> zeroinitializer, i32 %69, i32 0
%broadcast.splat49 = shufflevector <2 x i32> %broadcast.splatinsert48, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction50 = add <2 x i32> %broadcast.splat49, <i32 0, i32 1>
%induction51 = add <2 x i32> %broadcast.splat49, <i32 2, i32 3>
%70 = getelementptr i32* %1, i64 %index31
%71 = bitcast i32* %70 to <2 x i32>*
store <2 x i32> %induction50, <2 x i32>* %71, align 4
%.sum99 = or i64 %index31, 2
%72 = getelementptr i32* %1, i64 %.sum99
%73 = bitcast i32* %72 to <2 x i32>*
store <2 x i32> %induction51, <2 x i32>* %73, align 4
%index.next43 = add i64 %index31, 4
%74 = trunc i64 %index.next43 to i32
%broadcast.splatinsert48.1 = insertelement <2 x i32> zeroinitializer, i32 %74, i32 0
%broadcast.splat49.1 = shufflevector <2 x i32> %broadcast.splatinsert48.1, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%induction50.1 = add <2 x i32> %broadcast.splat49.1, <i32 0, i32 1>
%induction51.1 = add <2 x i32> %broadcast.splat49.1, <i32 2, i32 3>
%75 = getelementptr i32* %1, i64 %index.next43
%76 = bitcast i32* %75 to <2 x i32>*
store <2 x i32> %induction50.1, <2 x i32>* %76, align 4
%.sum99.1 = or i64 %index.next43, 2
%77 = getelementptr i32* %1, i64 %.sum99.1
%78 = bitcast i32* %77 to <2 x i32>*
store <2 x i32> %induction51.1, <2 x i32>* %78, align 4
%index.next43.1 = add i64 %index.next43, 4
%79 = icmp eq i64 %index.next43.1, %n.vec34
br i1 %79, label %middle.block29.loopexit.unr-lcssa, label %vector.body28, !llvm.loop !8
middle.block29.loopexit.unr-lcssa: ; preds = %vector.body28
br label %middle.block29.loopexit
middle.block29.loopexit: ; preds = %vector.body28.preheader.split, %middle.block29.loopexit.unr-lcssa
br label %middle.block29
middle.block29: ; preds = %middle.block29.loopexit, %overflow.checked37
%resume.val38 = phi i64 [ 0, %overflow.checked37 ], [ %n.vec34, %middle.block29.loopexit ]
%cmp.n42 = icmp eq i64 %end.idx32, %resume.val38
br i1 %cmp.n42, label %vector.body57.preheader, label %polly.loop_header10.i.preheader
polly.loop_header.i: ; preds = %polly.loop_header.i, %polly.loop_header.i.preheader100.split.split
%polly.indvar.i = phi i64 [ %polly.indvar.i.unr142, %polly.loop_header.i.preheader100.split.split ], [ %polly.indvar_next.i.3, %polly.loop_header.i ]
%p_i.02.i = trunc i64 %polly.indvar.i to i32
%p_arrayidx.i = getelementptr i32* %1, i64 %polly.indvar.i
store i32 %p_i.02.i, i32* %p_arrayidx.i, align 4
%polly.indvar_next.i = add nsw i64 %polly.indvar.i, 1
%p_i.02.i.1 = trunc i64 %polly.indvar_next.i to i32
%p_arrayidx.i.1 = getelementptr i32* %1, i64 %polly.indvar_next.i
store i32 %p_i.02.i.1, i32* %p_arrayidx.i.1, align 4
%polly.indvar_next.i.1 = add nsw i64 %polly.indvar_next.i, 1
%p_i.02.i.2 = trunc i64 %polly.indvar_next.i.1 to i32
%p_arrayidx.i.2 = getelementptr i32* %1, i64 %polly.indvar_next.i.1
store i32 %p_i.02.i.2, i32* %p_arrayidx.i.2, align 4
%polly.indvar_next.i.2 = add nsw i64 %polly.indvar_next.i.1, 1
%p_i.02.i.3 = trunc i64 %polly.indvar_next.i.2 to i32
%p_arrayidx.i.3 = getelementptr i32* %1, i64 %polly.indvar_next.i.2
store i32 %p_i.02.i.3, i32* %p_arrayidx.i.3, align 4
%polly.indvar_next.i.3 = add nsw i64 %polly.indvar_next.i.2, 1
%polly.loop_cond.i.3 = icmp sgt i64 %polly.indvar_next.i.2, %polly.adjust_ub16.i.pre
br i1 %polly.loop_cond.i.3, label %polly.cond6.i.loopexit.unr-lcssa, label %polly.loop_header.i, !llvm.loop !9
polly.loop_header10.i: ; preds = %polly.loop_header10.i, %polly.loop_header10.i.preheader.split.split
%polly.indvar14.i = phi i64 [ %polly.indvar14.i.unr116, %polly.loop_header10.i.preheader.split.split ], [ %polly.indvar_next15.i.3, %polly.loop_header10.i ]
%p_i.0220.i = trunc i64 %polly.indvar14.i to i32
%p_arrayidx21.i = getelementptr i32* %1, i64 %polly.indvar14.i
store i32 %p_i.0220.i, i32* %p_arrayidx21.i, align 4
%polly.indvar_next15.i = add nsw i64 %polly.indvar14.i, 1
%p_i.0220.i.1 = trunc i64 %polly.indvar_next15.i to i32
%p_arrayidx21.i.1 = getelementptr i32* %1, i64 %polly.indvar_next15.i
store i32 %p_i.0220.i.1, i32* %p_arrayidx21.i.1, align 4
%polly.indvar_next15.i.1 = add nsw i64 %polly.indvar_next15.i, 1
%p_i.0220.i.2 = trunc i64 %polly.indvar_next15.i.1 to i32
%p_arrayidx21.i.2 = getelementptr i32* %1, i64 %polly.indvar_next15.i.1
store i32 %p_i.0220.i.2, i32* %p_arrayidx21.i.2, align 4
%polly.indvar_next15.i.2 = add nsw i64 %polly.indvar_next15.i.1, 1
%p_i.0220.i.3 = trunc i64 %polly.indvar_next15.i.2 to i32
%p_arrayidx21.i.3 = getelementptr i32* %1, i64 %polly.indvar_next15.i.2
store i32 %p_i.0220.i.3, i32* %p_arrayidx21.i.3, align 4
%polly.indvar_next15.i.3 = add nsw i64 %polly.indvar_next15.i.2, 1
%polly.loop_cond17.i.3 = icmp sgt i64 %polly.indvar_next15.i.2, %polly.adjust_ub16.i.pre
br i1 %polly.loop_cond17.i.3, label %vector.body57.preheader.loopexit.unr-lcssa, label %polly.loop_header10.i, !llvm.loop !10
vector.body57.preheader.loopexit.unr-lcssa: ; preds = %polly.loop_header10.i
br label %vector.body57.preheader.loopexit
vector.body57.preheader.loopexit: ; preds = %polly.loop_header10.i.preheader.split, %vector.body57.preheader.loopexit.unr-lcssa
br label %vector.body57.preheader
vector.body57.preheader: ; preds = %vector.body57.preheader.loopexit, %middle.block29, %cond.end, %polly.cond6.i
br label %vector.body57
vector.body57: ; preds = %vector.body57.preheader
%80 = bitcast i32* %1 to <4 x i32>*
%wide.load = load <4 x i32>* %80, align 4
%81 = getelementptr i32* %1, i64 4
%82 = bitcast i32* %81 to <4 x i32>*
%wide.load.1 = load <4 x i32>* %82, align 4
%83 = add nsw <4 x i32> %wide.load.1, %wide.load
%84 = getelementptr i32* %1, i64 8
%85 = bitcast i32* %84 to <4 x i32>*
%wide.load.2 = load <4 x i32>* %85, align 4
%86 = add nsw <4 x i32> %wide.load.2, %83
%87 = getelementptr i32* %1, i64 12
%88 = bitcast i32* %87 to <4 x i32>*
%wide.load.3 = load <4 x i32>* %88, align 4
%89 = add nsw <4 x i32> %wide.load.3, %86
%90 = getelementptr i32* %1, i64 16
%91 = bitcast i32* %90 to <4 x i32>*
%wide.load.4 = load <4 x i32>* %91, align 4
%92 = add nsw <4 x i32> %wide.load.4, %89
%93 = getelementptr i32* %1, i64 20
%94 = bitcast i32* %93 to <4 x i32>*
%wide.load.5 = load <4 x i32>* %94, align 4
%95 = add nsw <4 x i32> %wide.load.5, %92
%96 = getelementptr i32* %1, i64 24
%97 = bitcast i32* %96 to <4 x i32>*
%wide.load.6 = load <4 x i32>* %97, align 4
%98 = add nsw <4 x i32> %wide.load.6, %95
%99 = getelementptr i32* %1, i64 28
%100 = bitcast i32* %99 to <4 x i32>*
%wide.load.7 = load <4 x i32>* %100, align 4
%101 = add nsw <4 x i32> %wide.load.7, %98
%102 = getelementptr i32* %1, i64 32
%103 = bitcast i32* %102 to <4 x i32>*
%wide.load.8 = load <4 x i32>* %103, align 4
%104 = add nsw <4 x i32> %wide.load.8, %101
%105 = getelementptr i32* %1, i64 36
%106 = bitcast i32* %105 to <4 x i32>*
%wide.load.9 = load <4 x i32>* %106, align 4
%107 = add nsw <4 x i32> %wide.load.9, %104
%108 = getelementptr i32* %1, i64 40
%109 = bitcast i32* %108 to <4 x i32>*
%wide.load.10 = load <4 x i32>* %109, align 4
%110 = add nsw <4 x i32> %wide.load.10, %107
%111 = getelementptr i32* %1, i64 44
%112 = bitcast i32* %111 to <4 x i32>*
%wide.load.11 = load <4 x i32>* %112, align 4
%113 = add nsw <4 x i32> %wide.load.11, %110
%114 = getelementptr i32* %1, i64 48
%115 = bitcast i32* %114 to <4 x i32>*
%wide.load.12 = load <4 x i32>* %115, align 4
%116 = add nsw <4 x i32> %wide.load.12, %113
%117 = getelementptr i32* %1, i64 52
%118 = bitcast i32* %117 to <4 x i32>*
%wide.load.13 = load <4 x i32>* %118, align 4
%119 = add nsw <4 x i32> %wide.load.13, %116
%120 = getelementptr i32* %1, i64 56
%121 = bitcast i32* %120 to <4 x i32>*
%wide.load.14 = load <4 x i32>* %121, align 4
%122 = add nsw <4 x i32> %wide.load.14, %119
%123 = getelementptr i32* %1, i64 60
%124 = bitcast i32* %123 to <4 x i32>*
%wide.load.15 = load <4 x i32>* %124, align 4
%125 = add nsw <4 x i32> %wide.load.15, %122
%126 = getelementptr i32* %1, i64 64
%127 = bitcast i32* %126 to <4 x i32>*
%wide.load.16 = load <4 x i32>* %127, align 4
%128 = add nsw <4 x i32> %wide.load.16, %125
%129 = getelementptr i32* %1, i64 68
%130 = bitcast i32* %129 to <4 x i32>*
%wide.load.17 = load <4 x i32>* %130, align 4
%131 = add nsw <4 x i32> %wide.load.17, %128
%132 = getelementptr i32* %1, i64 72
%133 = bitcast i32* %132 to <4 x i32>*
%wide.load.18 = load <4 x i32>* %133, align 4
%134 = add nsw <4 x i32> %wide.load.18, %131
%135 = getelementptr i32* %1, i64 76
%136 = bitcast i32* %135 to <4 x i32>*
%wide.load.19 = load <4 x i32>* %136, align 4
%137 = add nsw <4 x i32> %wide.load.19, %134
%138 = getelementptr i32* %1, i64 80
%139 = bitcast i32* %138 to <4 x i32>*
%wide.load.20 = load <4 x i32>* %139, align 4
%140 = add nsw <4 x i32> %wide.load.20, %137
%141 = getelementptr i32* %1, i64 84
%142 = bitcast i32* %141 to <4 x i32>*
%wide.load.21 = load <4 x i32>* %142, align 4
%143 = add nsw <4 x i32> %wide.load.21, %140
%144 = getelementptr i32* %1, i64 88
%145 = bitcast i32* %144 to <4 x i32>*
%wide.load.22 = load <4 x i32>* %145, align 4
%146 = add nsw <4 x i32> %wide.load.22, %143
%147 = getelementptr i32* %1, i64 92
%148 = bitcast i32* %147 to <4 x i32>*
%wide.load.23 = load <4 x i32>* %148, align 4
%149 = add nsw <4 x i32> %wide.load.23, %146
%150 = getelementptr i32* %1, i64 96
%151 = bitcast i32* %150 to <4 x i32>*
%wide.load.24 = load <4 x i32>* %151, align 4
%152 = add nsw <4 x i32> %wide.load.24, %149
%rdx.shuf = shufflevector <4 x i32> %152, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 0, i32 0>
%bin.rdx = add <4 x i32> %152, %rdx.shuf
%rdx.shuf71 = shufflevector <4 x i32> %bin.rdx, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
%bin.rdx72 = add <4 x i32> %bin.rdx, %rdx.shuf71
%153 = extractelement <4 x i32> %bin.rdx72, i32 0
%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %153) #2
br label %vector.body76
vector.body76: ; preds = %vector.body57
%154 = bitcast i32* %1 to <4 x i32>*
%wide.load91 = load <4 x i32>* %154, align 4
%155 = getelementptr i32* %1, i64 4
%156 = bitcast i32* %155 to <4 x i32>*
%wide.load91.1 = load <4 x i32>* %156, align 4
%157 = add nsw <4 x i32> %wide.load91.1, %wide.load91
%158 = getelementptr i32* %1, i64 8
%159 = bitcast i32* %158 to <4 x i32>*
%wide.load91.2 = load <4 x i32>* %159, align 4
%160 = add nsw <4 x i32> %wide.load91.2, %157
%161 = getelementptr i32* %1, i64 12
%162 = bitcast i32* %161 to <4 x i32>*
%wide.load91.3 = load <4 x i32>* %162, align 4
%163 = add nsw <4 x i32> %wide.load91.3, %160
%164 = getelementptr i32* %1, i64 16
%165 = bitcast i32* %164 to <4 x i32>*
%wide.load91.4 = load <4 x i32>* %165, align 4
%166 = add nsw <4 x i32> %wide.load91.4, %163
%167 = getelementptr i32* %1, i64 20
%168 = bitcast i32* %167 to <4 x i32>*
%wide.load91.5 = load <4 x i32>* %168, align 4
%169 = add nsw <4 x i32> %wide.load91.5, %166
%170 = getelementptr i32* %1, i64 24
%171 = bitcast i32* %170 to <4 x i32>*
%wide.load91.6 = load <4 x i32>* %171, align 4
%172 = add nsw <4 x i32> %wide.load91.6, %169
%173 = getelementptr i32* %1, i64 28
%174 = bitcast i32* %173 to <4 x i32>*
%wide.load91.7 = load <4 x i32>* %174, align 4
%175 = add nsw <4 x i32> %wide.load91.7, %172
%176 = getelementptr i32* %1, i64 32
%177 = bitcast i32* %176 to <4 x i32>*
%wide.load91.8 = load <4 x i32>* %177, align 4
%178 = add nsw <4 x i32> %wide.load91.8, %175
%179 = getelementptr i32* %1, i64 36
%180 = bitcast i32* %179 to <4 x i32>*
%wide.load91.9 = load <4 x i32>* %180, align 4
%181 = add nsw <4 x i32> %wide.load91.9, %178
%182 = getelementptr i32* %1, i64 40
%183 = bitcast i32* %182 to <4 x i32>*
%wide.load91.10 = load <4 x i32>* %183, align 4
%184 = add nsw <4 x i32> %wide.load91.10, %181
%185 = getelementptr i32* %1, i64 44
%186 = bitcast i32* %185 to <4 x i32>*
%wide.load91.11 = load <4 x i32>* %186, align 4
%187 = add nsw <4 x i32> %wide.load91.11, %184
%188 = getelementptr i32* %1, i64 48
%189 = bitcast i32* %188 to <4 x i32>*
%wide.load91.12 = load <4 x i32>* %189, align 4
%190 = add nsw <4 x i32> %wide.load91.12, %187
%191 = getelementptr i32* %1, i64 52
%192 = bitcast i32* %191 to <4 x i32>*
%wide.load91.13 = load <4 x i32>* %192, align 4
%193 = add nsw <4 x i32> %wide.load91.13, %190
%194 = getelementptr i32* %1, i64 56
%195 = bitcast i32* %194 to <4 x i32>*
%wide.load91.14 = load <4 x i32>* %195, align 4
%196 = add nsw <4 x i32> %wide.load91.14, %193
%197 = getelementptr i32* %1, i64 60
%198 = bitcast i32* %197 to <4 x i32>*
%wide.load91.15 = load <4 x i32>* %198, align 4
%199 = add nsw <4 x i32> %wide.load91.15, %196
%200 = getelementptr i32* %1, i64 64
%201 = bitcast i32* %200 to <4 x i32>*
%wide.load91.16 = load <4 x i32>* %201, align 4
%202 = add nsw <4 x i32> %wide.load91.16, %199
%203 = getelementptr i32* %1, i64 68
%204 = bitcast i32* %203 to <4 x i32>*
%wide.load91.17 = load <4 x i32>* %204, align 4
%205 = add nsw <4 x i32> %wide.load91.17, %202
%206 = getelementptr i32* %1, i64 72
%207 = bitcast i32* %206 to <4 x i32>*
%wide.load91.18 = load <4 x i32>* %207, align 4
%208 = add nsw <4 x i32> %wide.load91.18, %205
%209 = getelementptr i32* %1, i64 76
%210 = bitcast i32* %209 to <4 x i32>*
%wide.load91.19 = load <4 x i32>* %210, align 4
%211 = add nsw <4 x i32> %wide.load91.19, %208
%212 = getelementptr i32* %1, i64 80
%213 = bitcast i32* %212 to <4 x i32>*
%wide.load91.20 = load <4 x i32>* %213, align 4
%214 = add nsw <4 x i32> %wide.load91.20, %211
%215 = getelementptr i32* %1, i64 84
%216 = bitcast i32* %215 to <4 x i32>*
%wide.load91.21 = load <4 x i32>* %216, align 4
%217 = add nsw <4 x i32> %wide.load91.21, %214
%218 = getelementptr i32* %1, i64 88
%219 = bitcast i32* %218 to <4 x i32>*
%wide.load91.22 = load <4 x i32>* %219, align 4
%220 = add nsw <4 x i32> %wide.load91.22, %217
%221 = getelementptr i32* %1, i64 92
%222 = bitcast i32* %221 to <4 x i32>*
%wide.load91.23 = load <4 x i32>* %222, align 4
%223 = add nsw <4 x i32> %wide.load91.23, %220
%224 = getelementptr i32* %1, i64 96
%225 = bitcast i32* %224 to <4 x i32>*
%wide.load91.24 = load <4 x i32>* %225, align 4
%226 = add nsw <4 x i32> %wide.load91.24, %223
%rdx.shuf93 = shufflevector <4 x i32> %226, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 0, i32 0>
%bin.rdx94 = add <4 x i32> %226, %rdx.shuf93
%rdx.shuf95 = shufflevector <4 x i32> %bin.rdx94, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
%bin.rdx96 = add <4 x i32> %bin.rdx94, %rdx.shuf95
%227 = extractelement <4 x i32> %bin.rdx96, i32 0
%call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str1, i64 0, i64 0), i32 %227) #2
tail call void @free(i8* %call1) #2
ret i32 0
}
; Function Attrs: nounwind
declare noalias i8* @malloc(i64) #1
; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...) #1
; Function Attrs: nounwind
declare void @free(i8* nocapture) #1
; Function Attrs: nounwind
declare i64 @strtol(i8* readonly, i8** nocapture, i32) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.5.0 "}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
!5 = metadata !{metadata !5, metadata !6, metadata !7}
!6 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
!7 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
!8 = metadata !{metadata !8, metadata !6, metadata !7}
!9 = metadata !{metadata !9, metadata !6, metadata !7}
!10 = metadata !{metadata !10, metadata !6, metadata !7}
-------------- next part --------------
.text
.file "out.ll"
.section .rodata.cst16,"aM", at progbits,16
.align 16
.LCPI0_0:
.quad 2 # 0x2
.quad 3 # 0x3
.text
.globl main
.align 16, 0x90
.type main, at function
main: # @main
.cfi_startproc
# BB#0: # %entry
subq $328, %rsp # imm = 0x148
.Ltmp0:
.cfi_def_cfa_offset 336
movl $100, %eax
cmpl $2, %edi
movq %rsi, 320(%rsp) # 8-byte Spill
movl %eax, 316(%rsp) # 4-byte Spill
jl .LBB0_2
# BB#1: # %cond.false
movq 320(%rsp), %rax # 8-byte Reload
movq 8(%rax), %rdi
xorl %ecx, %ecx
movl %ecx, %esi
movl $10, %edx
callq strtol
movl %eax, %ecx
movl %ecx, 316(%rsp) # 4-byte Spill
.LBB0_2: # %cond.end
movl 316(%rsp), %eax # 4-byte Reload
movslq %eax, %rcx
shlq $2, %rcx
movq %rcx, %rdi
movl %eax, 312(%rsp) # 4-byte Spill
callq malloc
movq %rax, %rcx
movl 312(%rsp), %edx # 4-byte Reload
cmpl $0, %edx
movq %rax, 304(%rsp) # 8-byte Spill
movq %rcx, 296(%rsp) # 8-byte Spill
je .LBB0_47
# BB#3: # %polly.cond3.i
movl 312(%rsp), %eax # 4-byte Reload
movl %eax, %ecx
movl %ecx, %edx
addq $-2, %rdx
cmpl $1, %eax
movq %rdx, 288(%rsp) # 8-byte Spill
jl .LBB0_25
# BB#4: # %polly.loop_header.i.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
cmpq $-1, %rdi
movq %rdi, 280(%rsp) # 8-byte Spill
movq %rax, 272(%rsp) # 8-byte Spill
jne .LBB0_13
.LBB0_5: # %polly.loop_header.i.preheader100
movq 272(%rsp), %rax # 8-byte Reload
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq %rsi, %rax
cmovgq %rax, %rsi
addq $1, %rsi
subq %rax, %rsi
movq %rsi, %rdi
andq $3, %rdi
cmpq $0, %rdi
setne %r8b
cmpq $0, %rsi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rax, %r10
movq %rax, 264(%rsp) # 8-byte Spill
movq %rsi, 256(%rsp) # 8-byte Spill
movq %rdi, 248(%rsp) # 8-byte Spill
movq %r10, 240(%rsp) # 8-byte Spill
jne .LBB0_6
jmp .LBB0_11
.LBB0_6: # %unr.cmp140
movq 248(%rsp), %rax # 8-byte Reload
cmpq $1, %rax
movq 264(%rsp), %rcx # 8-byte Reload
movq %rcx, 232(%rsp) # 8-byte Spill
je .LBB0_10
# BB#7: # %unr.cmp132
movq 248(%rsp), %rax # 8-byte Reload
cmpq $2, %rax
movq 264(%rsp), %rcx # 8-byte Reload
movq %rcx, 224(%rsp) # 8-byte Spill
je .LBB0_9
# BB#8: # %polly.loop_header.i.unr
movq 264(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 224(%rsp) # 8-byte Spill
.LBB0_9: # %polly.loop_header.i.unr127
movq 224(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 232(%rsp) # 8-byte Spill
.LBB0_10: # %polly.loop_header.i.unr134
movq 232(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 240(%rsp) # 8-byte Spill
.LBB0_11: # %polly.loop_header.i.preheader100.split
movq 240(%rsp), %rax # 8-byte Reload
movq 256(%rsp), %rcx # 8-byte Reload
cmpq $4, %rcx
movq %rax, 216(%rsp) # 8-byte Spill
jb .LBB0_23
# BB#12: # %polly.loop_header.i.preheader100.split.split
movq 216(%rsp), %rax # 8-byte Reload
movq %rax, 208(%rsp) # 8-byte Spill
jmp .LBB0_43
.LBB0_13: # %overflow.checked
movabsq $0, %rax
movq 280(%rsp), %rcx # 8-byte Reload
addq $1, %rcx
movq 280(%rsp), %rdx # 8-byte Reload
addq $1, %rdx
andq $-4, %rcx
cmpq $0, %rcx
movq %rcx, 200(%rsp) # 8-byte Spill
movq %rdx, 192(%rsp) # 8-byte Spill
movq %rax, 184(%rsp) # 8-byte Spill
je .LBB0_21
# BB#14: # %vector.body.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
addq $1, %rdi
shrq $2, %rdi
shlq $2, %rdi
addq $-4, %rdi
shrq $2, %rdi
addq $1, %rdi
movq %rdi, %rsi
andq $1, %rsi
cmpq $0, %rsi
setne %r8b
cmpq $0, %rdi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rdi, 176(%rsp) # 8-byte Spill
movq %rax, 168(%rsp) # 8-byte Spill
jne .LBB0_15
jmp .LBB0_16
.LBB0_15: # %vector.body.unr
movabsq $4, %rax
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm0
pslldq $8, %xmm0
movaps .LCPI0_0(%rip), %xmm1
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm0, (%rdx)
pshufd $8, %xmm1, %xmm0 # xmm0 = xmm1[0,2,0,0]
movq %xmm0, 8(%rdx)
addq $0, %rax
movq %rax, 168(%rsp) # 8-byte Spill
.LBB0_16: # %vector.body.preheader.split
movq 168(%rsp), %rax # 8-byte Reload
movq 176(%rsp), %rcx # 8-byte Reload
cmpq $2, %rcx
movq %rax, 160(%rsp) # 8-byte Spill
jb .LBB0_20
# BB#17: # %vector.body.preheader.split.split
movq 160(%rsp), %rax # 8-byte Reload
movq %rax, 152(%rsp) # 8-byte Spill
jmp .LBB0_18
.LBB0_18: # %vector.body
# =>This Inner Loop Header: Depth=1
movq 152(%rsp), %rax # 8-byte Reload
movd %rax, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm1
pslldq $8, %xmm1
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
movaps .LCPI0_0(%rip), %xmm3
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm2 # xmm2 = xmm2[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm2, (%rdx,%rax,4)
movq %rax, %rsi
orq $2, %rsi
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rsi,4)
movq %rax, %rsi
addq $4, %rsi
movd %rsi, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm1 # xmm1 = xmm2[0,2,0,0]
movq %xmm1, 16(%rdx,%rax,4)
movq %rsi, %rax
orq $2, %rax
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rax,4)
addq $4, %rsi
movq 200(%rsp), %rax # 8-byte Reload
cmpq %rax, %rsi
movq %rsi, 152(%rsp) # 8-byte Spill
jne .LBB0_18
# BB#19: # %middle.block.loopexit.unr-lcssa
jmp .LBB0_20
.LBB0_20: # %middle.block.loopexit
movq 200(%rsp), %rax # 8-byte Reload
movq %rax, 184(%rsp) # 8-byte Spill
jmp .LBB0_21
.LBB0_21: # %middle.block
movq 184(%rsp), %rax # 8-byte Reload
movq 192(%rsp), %rcx # 8-byte Reload
cmpq %rax, %rcx
movq %rax, 272(%rsp) # 8-byte Spill
je .LBB0_24
jmp .LBB0_5
.LBB0_22: # %polly.cond6.i.loopexit.unr-lcssa
jmp .LBB0_23
.LBB0_23: # %polly.cond6.i.loopexit
jmp .LBB0_24
.LBB0_24: # %polly.cond6.i
movl 312(%rsp), %eax # 4-byte Reload
cmpl $-1, %eax
jg .LBB0_47
.LBB0_25: # %polly.loop_header10.preheader.i
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
cmpq $-1, %rdi
movq %rdi, 144(%rsp) # 8-byte Spill
movq %rax, 136(%rsp) # 8-byte Spill
jne .LBB0_34
.LBB0_26: # %polly.loop_header10.i.preheader
movq 136(%rsp), %rax # 8-byte Reload
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq %rsi, %rax
cmovgq %rax, %rsi
addq $1, %rsi
subq %rax, %rsi
movq %rsi, %rdi
andq $3, %rdi
cmpq $0, %rdi
setne %r8b
cmpq $0, %rsi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rax, %r10
movq %rax, 128(%rsp) # 8-byte Spill
movq %rsi, 120(%rsp) # 8-byte Spill
movq %rdi, 112(%rsp) # 8-byte Spill
movq %r10, 104(%rsp) # 8-byte Spill
jne .LBB0_27
jmp .LBB0_32
.LBB0_27: # %unr.cmp114
movq 112(%rsp), %rax # 8-byte Reload
cmpq $1, %rax
movq 128(%rsp), %rcx # 8-byte Reload
movq %rcx, 96(%rsp) # 8-byte Spill
je .LBB0_31
# BB#28: # %unr.cmp
movq 112(%rsp), %rax # 8-byte Reload
cmpq $2, %rax
movq 128(%rsp), %rcx # 8-byte Reload
movq %rcx, 88(%rsp) # 8-byte Spill
je .LBB0_30
# BB#29: # %polly.loop_header10.i.unr
movq 128(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 88(%rsp) # 8-byte Spill
.LBB0_30: # %polly.loop_header10.i.unr103
movq 88(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 96(%rsp) # 8-byte Spill
.LBB0_31: # %polly.loop_header10.i.unr108
movq 96(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 104(%rsp) # 8-byte Spill
.LBB0_32: # %polly.loop_header10.i.preheader.split
movq 104(%rsp), %rax # 8-byte Reload
movq 120(%rsp), %rcx # 8-byte Reload
cmpq $4, %rcx
movq %rax, 80(%rsp) # 8-byte Spill
jb .LBB0_46
# BB#33: # %polly.loop_header10.i.preheader.split.split
movq 80(%rsp), %rax # 8-byte Reload
movq %rax, 72(%rsp) # 8-byte Spill
jmp .LBB0_44
.LBB0_34: # %overflow.checked37
movabsq $0, %rax
movq 144(%rsp), %rcx # 8-byte Reload
addq $1, %rcx
movq 144(%rsp), %rdx # 8-byte Reload
addq $1, %rdx
andq $-4, %rcx
cmpq $0, %rcx
movq %rcx, 64(%rsp) # 8-byte Spill
movq %rdx, 56(%rsp) # 8-byte Spill
movq %rax, 48(%rsp) # 8-byte Spill
je .LBB0_42
# BB#35: # %vector.body28.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
addq $1, %rdi
shrq $2, %rdi
shlq $2, %rdi
addq $-4, %rdi
shrq $2, %rdi
addq $1, %rdi
movq %rdi, %rsi
andq $1, %rsi
cmpq $0, %rsi
setne %r8b
cmpq $0, %rdi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rdi, 40(%rsp) # 8-byte Spill
movq %rax, 32(%rsp) # 8-byte Spill
jne .LBB0_36
jmp .LBB0_37
.LBB0_36: # %vector.body28.unr
movabsq $4, %rax
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm0
pslldq $8, %xmm0
movaps .LCPI0_0(%rip), %xmm1
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm0, (%rdx)
pshufd $8, %xmm1, %xmm0 # xmm0 = xmm1[0,2,0,0]
movq %xmm0, 8(%rdx)
addq $0, %rax
movq %rax, 32(%rsp) # 8-byte Spill
.LBB0_37: # %vector.body28.preheader.split
movq 32(%rsp), %rax # 8-byte Reload
movq 40(%rsp), %rcx # 8-byte Reload
cmpq $2, %rcx
movq %rax, 24(%rsp) # 8-byte Spill
jb .LBB0_41
# BB#38: # %vector.body28.preheader.split.split
movq 24(%rsp), %rax # 8-byte Reload
movq %rax, 16(%rsp) # 8-byte Spill
jmp .LBB0_39
.LBB0_39: # %vector.body28
# =>This Inner Loop Header: Depth=1
movq 16(%rsp), %rax # 8-byte Reload
movd %rax, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm1
pslldq $8, %xmm1
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
movaps .LCPI0_0(%rip), %xmm3
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm2 # xmm2 = xmm2[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm2, (%rdx,%rax,4)
movq %rax, %rsi
orq $2, %rsi
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rsi,4)
movq %rax, %rsi
addq $4, %rsi
movd %rsi, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm1 # xmm1 = xmm2[0,2,0,0]
movq %xmm1, 16(%rdx,%rax,4)
movq %rsi, %rax
orq $2, %rax
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rax,4)
addq $4, %rsi
movq 64(%rsp), %rax # 8-byte Reload
cmpq %rax, %rsi
movq %rsi, 16(%rsp) # 8-byte Spill
jne .LBB0_39
# BB#40: # %middle.block29.loopexit.unr-lcssa
jmp .LBB0_41
.LBB0_41: # %middle.block29.loopexit
movq 64(%rsp), %rax # 8-byte Reload
movq %rax, 48(%rsp) # 8-byte Spill
jmp .LBB0_42
.LBB0_42: # %middle.block29
movq 48(%rsp), %rax # 8-byte Reload
movq 56(%rsp), %rcx # 8-byte Reload
cmpq %rax, %rcx
movq %rax, 136(%rsp) # 8-byte Spill
je .LBB0_47
jmp .LBB0_26
.LBB0_43: # %polly.loop_header.i
# =>This Inner Loop Header: Depth=1
movq 208(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
movq %rax, %rsi
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 4(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 8(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 12(%rdx,%rax,4)
movq %rsi, %rax
addq $1, %rax
movq 288(%rsp), %rdi # 8-byte Reload
cmpq %rdi, %rsi
movq %rax, 208(%rsp) # 8-byte Spill
jg .LBB0_22
jmp .LBB0_43
.LBB0_44: # %polly.loop_header10.i
# =>This Inner Loop Header: Depth=1
movq 72(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
movq %rax, %rsi
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 4(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 8(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 12(%rdx,%rax,4)
movq %rsi, %rax
addq $1, %rax
movq 288(%rsp), %rdi # 8-byte Reload
cmpq %rdi, %rsi
movq %rax, 72(%rsp) # 8-byte Spill
jle .LBB0_44
# BB#45: # %vector.body57.preheader.loopexit.unr-lcssa
jmp .LBB0_46
.LBB0_46: # %vector.body57.preheader.loopexit
jmp .LBB0_47
.LBB0_47: # %vector.body57.preheader
jmp .LBB0_48
.LBB0_48: # %vector.body57
movq 296(%rsp), %rax # 8-byte Reload
movups (%rax), %xmm0
movups 16(%rax), %xmm1
movups 32(%rax), %xmm2
movups 48(%rax), %xmm3
paddd %xmm0, %xmm1
paddd %xmm1, %xmm2
paddd %xmm2, %xmm3
movups 64(%rax), %xmm0
paddd %xmm3, %xmm0
movups 80(%rax), %xmm1
paddd %xmm0, %xmm1
movups 96(%rax), %xmm0
paddd %xmm1, %xmm0
movups 112(%rax), %xmm1
paddd %xmm0, %xmm1
movups 128(%rax), %xmm0
paddd %xmm1, %xmm0
movups 144(%rax), %xmm1
paddd %xmm0, %xmm1
movups 160(%rax), %xmm0
paddd %xmm1, %xmm0
movups 176(%rax), %xmm1
paddd %xmm0, %xmm1
movups 192(%rax), %xmm0
paddd %xmm1, %xmm0
movups 208(%rax), %xmm1
paddd %xmm0, %xmm1
movups 224(%rax), %xmm0
paddd %xmm1, %xmm0
movups 240(%rax), %xmm1
paddd %xmm0, %xmm1
movups 256(%rax), %xmm0
paddd %xmm1, %xmm0
movups 272(%rax), %xmm1
paddd %xmm0, %xmm1
movups 288(%rax), %xmm0
paddd %xmm1, %xmm0
movups 304(%rax), %xmm1
paddd %xmm0, %xmm1
movups 320(%rax), %xmm0
paddd %xmm1, %xmm0
movups 336(%rax), %xmm1
paddd %xmm0, %xmm1
movups 352(%rax), %xmm0
paddd %xmm1, %xmm0
movups 368(%rax), %xmm1
paddd %xmm0, %xmm1
movups 384(%rax), %xmm0
paddd %xmm1, %xmm0
pshufd $14, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,0]
paddd %xmm1, %xmm0
pshufd $1, %xmm0, %xmm1 # xmm1 = xmm0[1,0,0,0]
paddd %xmm1, %xmm0
movd %xmm0, %esi
movl $.L.str, %ecx
movl %ecx, %edi
xorl %ecx, %ecx
movb %cl, %dl
movb %dl, %al
callq printf
movl %eax, 12(%rsp) # 4-byte Spill
# BB#49: # %vector.body76
movq 296(%rsp), %rax # 8-byte Reload
movups (%rax), %xmm0
movups 16(%rax), %xmm1
movups 32(%rax), %xmm2
movups 48(%rax), %xmm3
paddd %xmm0, %xmm1
paddd %xmm1, %xmm2
paddd %xmm2, %xmm3
movups 64(%rax), %xmm0
paddd %xmm3, %xmm0
movups 80(%rax), %xmm1
paddd %xmm0, %xmm1
movups 96(%rax), %xmm0
paddd %xmm1, %xmm0
movups 112(%rax), %xmm1
paddd %xmm0, %xmm1
movups 128(%rax), %xmm0
paddd %xmm1, %xmm0
movups 144(%rax), %xmm1
paddd %xmm0, %xmm1
movups 160(%rax), %xmm0
paddd %xmm1, %xmm0
movups 176(%rax), %xmm1
paddd %xmm0, %xmm1
movups 192(%rax), %xmm0
paddd %xmm1, %xmm0
movups 208(%rax), %xmm1
paddd %xmm0, %xmm1
movups 224(%rax), %xmm0
paddd %xmm1, %xmm0
movups 240(%rax), %xmm1
paddd %xmm0, %xmm1
movups 256(%rax), %xmm0
paddd %xmm1, %xmm0
movups 272(%rax), %xmm1
paddd %xmm0, %xmm1
movups 288(%rax), %xmm0
paddd %xmm1, %xmm0
movups 304(%rax), %xmm1
paddd %xmm0, %xmm1
movups 320(%rax), %xmm0
paddd %xmm1, %xmm0
movups 336(%rax), %xmm1
paddd %xmm0, %xmm1
movups 352(%rax), %xmm0
paddd %xmm1, %xmm0
movups 368(%rax), %xmm1
paddd %xmm0, %xmm1
movups 384(%rax), %xmm0
paddd %xmm1, %xmm0
pshufd $14, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,0]
paddd %xmm1, %xmm0
pshufd $1, %xmm0, %xmm1 # xmm1 = xmm0[1,0,0,0]
paddd %xmm1, %xmm0
movd %xmm0, %esi
movl $.L.str1, %ecx
movl %ecx, %edi
xorl %ecx, %ecx
movb %cl, %dl
movb %dl, %al
callq printf
movq 304(%rsp), %rdi # 8-byte Reload
movl %eax, 8(%rsp) # 4-byte Spill
callq free
movl $0, %eax
addq $328, %rsp # imm = 0x148
retq
.Ltmp1:
.size main, .Ltmp1-main
.cfi_endproc
.type .L.str, at object # @.str
.section .rodata.str1.1,"aMS", at progbits,1
.L.str:
.asciz "Sum1 = %d\n"
.size .L.str, 11
.type .L.str1, at object # @.str1
.L.str1:
.asciz "Sum2 = %d\n"
.size .L.str1, 11
.ident "clang version 3.5.0 "
.section ".note.GNU-stack","", at progbits
-------------- next part --------------
.text
.file "out.ll"
.section .rodata.cst16,"aM", at progbits,16
.align 16
.LCPI0_0:
.quad 2 # 0x2
.quad 3 # 0x3
.text
.globl main
.align 16, 0x90
.type main, at function
main: # @main
.cfi_startproc
# BB#0: # %entry
subq $328, %rsp # imm = 0x148
.Ltmp0:
.cfi_def_cfa_offset 336
movl $100, %eax
cmpl $2, %edi
movq %rsi, 320(%rsp) # 8-byte Spill
movl %eax, 316(%rsp) # 4-byte Spill
jl .LBB0_2
# BB#1: # %cond.false
movq 320(%rsp), %rax # 8-byte Reload
movq 8(%rax), %rdi
xorl %ecx, %ecx
movl %ecx, %esi
movl $10, %edx
callq strtol
movl %eax, %ecx
movl %ecx, 316(%rsp) # 4-byte Spill
.LBB0_2: # %cond.end
movl 316(%rsp), %eax # 4-byte Reload
movslq %eax, %rcx
shlq $2, %rcx
movq %rcx, %rdi
movl %eax, 312(%rsp) # 4-byte Spill
callq malloc
movq %rax, %rcx
movl 312(%rsp), %edx # 4-byte Reload
cmpl $0, %edx
movq %rax, 304(%rsp) # 8-byte Spill
movq %rcx, 296(%rsp) # 8-byte Spill
je .LBB0_47
# BB#3: # %polly.cond3.i
movl 312(%rsp), %eax # 4-byte Reload
movl %eax, %ecx
movl %ecx, %edx
addq $-2, %rdx
cmpl $1, %eax
movq %rdx, 288(%rsp) # 8-byte Spill
jl .LBB0_25
# BB#4: # %polly.loop_header.i.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
cmpq $-1, %rdi
movq %rdi, 280(%rsp) # 8-byte Spill
movq %rax, 272(%rsp) # 8-byte Spill
jne .LBB0_13
.LBB0_5: # %polly.loop_header.i.preheader100
movq 272(%rsp), %rax # 8-byte Reload
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq %rsi, %rax
cmovgq %rax, %rsi
addq $1, %rsi
subq %rax, %rsi
movq %rsi, %rdi
andq $3, %rdi
cmpq $0, %rdi
setne %r8b
cmpq $0, %rsi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rax, %r10
movq %rax, 264(%rsp) # 8-byte Spill
movq %rsi, 256(%rsp) # 8-byte Spill
movq %rdi, 248(%rsp) # 8-byte Spill
movq %r10, 240(%rsp) # 8-byte Spill
jne .LBB0_6
jmp .LBB0_11
.LBB0_6: # %unr.cmp140
movq 248(%rsp), %rax # 8-byte Reload
cmpq $1, %rax
movq 264(%rsp), %rcx # 8-byte Reload
movq %rcx, 232(%rsp) # 8-byte Spill
je .LBB0_10
# BB#7: # %unr.cmp132
movq 248(%rsp), %rax # 8-byte Reload
cmpq $2, %rax
movq 264(%rsp), %rcx # 8-byte Reload
movq %rcx, 224(%rsp) # 8-byte Spill
je .LBB0_9
# BB#8: # %polly.loop_header.i.unr
movq 264(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 224(%rsp) # 8-byte Spill
.LBB0_9: # %polly.loop_header.i.unr127
movq 224(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 232(%rsp) # 8-byte Spill
.LBB0_10: # %polly.loop_header.i.unr134
movq 232(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 240(%rsp) # 8-byte Spill
.LBB0_11: # %polly.loop_header.i.preheader100.split
movq 240(%rsp), %rax # 8-byte Reload
movq 256(%rsp), %rcx # 8-byte Reload
cmpq $4, %rcx
movq %rax, 216(%rsp) # 8-byte Spill
jb .LBB0_23
# BB#12: # %polly.loop_header.i.preheader100.split.split
movq 216(%rsp), %rax # 8-byte Reload
movq %rax, 208(%rsp) # 8-byte Spill
jmp .LBB0_43
.LBB0_13: # %overflow.checked
movabsq $0, %rax
movq 280(%rsp), %rcx # 8-byte Reload
addq $1, %rcx
movq 280(%rsp), %rdx # 8-byte Reload
addq $1, %rdx
andq $-4, %rcx
cmpq $0, %rcx
movq %rcx, 200(%rsp) # 8-byte Spill
movq %rdx, 192(%rsp) # 8-byte Spill
movq %rax, 184(%rsp) # 8-byte Spill
je .LBB0_21
# BB#14: # %vector.body.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
addq $1, %rdi
shrq $2, %rdi
shlq $2, %rdi
addq $-4, %rdi
shrq $2, %rdi
addq $1, %rdi
movq %rdi, %rsi
andq $1, %rsi
cmpq $0, %rsi
setne %r8b
cmpq $0, %rdi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rdi, 176(%rsp) # 8-byte Spill
movq %rax, 168(%rsp) # 8-byte Spill
jne .LBB0_15
jmp .LBB0_16
.LBB0_15: # %vector.body.unr
movabsq $4, %rax
movl $2, %ecx
movl %ecx, %edx
movd %rdx, %xmm0
xorps %xmm1, %xmm1
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm1, (%rdx)
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, 8(%rdx)
addq $0, %rax
movq %rax, 168(%rsp) # 8-byte Spill
.LBB0_16: # %vector.body.preheader.split
movq 168(%rsp), %rax # 8-byte Reload
movq 176(%rsp), %rcx # 8-byte Reload
cmpq $2, %rcx
movq %rax, 160(%rsp) # 8-byte Spill
jb .LBB0_20
# BB#17: # %vector.body.preheader.split.split
movq 160(%rsp), %rax # 8-byte Reload
movq %rax, 152(%rsp) # 8-byte Spill
jmp .LBB0_18
.LBB0_18: # %vector.body
# =>This Inner Loop Header: Depth=1
movq 152(%rsp), %rax # 8-byte Reload
movd %rax, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm1
pslldq $8, %xmm1
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
movaps .LCPI0_0(%rip), %xmm3
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm2 # xmm2 = xmm2[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm2, (%rdx,%rax,4)
movq %rax, %rsi
orq $2, %rsi
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rsi,4)
movq %rax, %rsi
addq $4, %rsi
movd %rsi, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm1 # xmm1 = xmm2[0,2,0,0]
movq %xmm1, 16(%rdx,%rax,4)
movq %rsi, %rax
orq $2, %rax
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rax,4)
addq $4, %rsi
movq 200(%rsp), %rax # 8-byte Reload
cmpq %rax, %rsi
movq %rsi, 152(%rsp) # 8-byte Spill
jne .LBB0_18
# BB#19: # %middle.block.loopexit.unr-lcssa
jmp .LBB0_20
.LBB0_20: # %middle.block.loopexit
movq 200(%rsp), %rax # 8-byte Reload
movq %rax, 184(%rsp) # 8-byte Spill
jmp .LBB0_21
.LBB0_21: # %middle.block
movq 184(%rsp), %rax # 8-byte Reload
movq 192(%rsp), %rcx # 8-byte Reload
cmpq %rax, %rcx
movq %rax, 272(%rsp) # 8-byte Spill
je .LBB0_24
jmp .LBB0_5
.LBB0_22: # %polly.cond6.i.loopexit.unr-lcssa
jmp .LBB0_23
.LBB0_23: # %polly.cond6.i.loopexit
jmp .LBB0_24
.LBB0_24: # %polly.cond6.i
movl 312(%rsp), %eax # 4-byte Reload
cmpl $-1, %eax
jg .LBB0_47
.LBB0_25: # %polly.loop_header10.preheader.i
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
cmpq $-1, %rdi
movq %rdi, 144(%rsp) # 8-byte Spill
movq %rax, 136(%rsp) # 8-byte Spill
jne .LBB0_34
.LBB0_26: # %polly.loop_header10.i.preheader
movq 136(%rsp), %rax # 8-byte Reload
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq %rsi, %rax
cmovgq %rax, %rsi
addq $1, %rsi
subq %rax, %rsi
movq %rsi, %rdi
andq $3, %rdi
cmpq $0, %rdi
setne %r8b
cmpq $0, %rsi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rax, %r10
movq %rax, 128(%rsp) # 8-byte Spill
movq %rsi, 120(%rsp) # 8-byte Spill
movq %rdi, 112(%rsp) # 8-byte Spill
movq %r10, 104(%rsp) # 8-byte Spill
jne .LBB0_27
jmp .LBB0_32
.LBB0_27: # %unr.cmp114
movq 112(%rsp), %rax # 8-byte Reload
cmpq $1, %rax
movq 128(%rsp), %rcx # 8-byte Reload
movq %rcx, 96(%rsp) # 8-byte Spill
je .LBB0_31
# BB#28: # %unr.cmp
movq 112(%rsp), %rax # 8-byte Reload
cmpq $2, %rax
movq 128(%rsp), %rcx # 8-byte Reload
movq %rcx, 88(%rsp) # 8-byte Spill
je .LBB0_30
# BB#29: # %polly.loop_header10.i.unr
movq 128(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 88(%rsp) # 8-byte Spill
.LBB0_30: # %polly.loop_header10.i.unr103
movq 88(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 96(%rsp) # 8-byte Spill
.LBB0_31: # %polly.loop_header10.i.unr108
movq 96(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
addq $1, %rax
movq %rax, 104(%rsp) # 8-byte Spill
.LBB0_32: # %polly.loop_header10.i.preheader.split
movq 104(%rsp), %rax # 8-byte Reload
movq 120(%rsp), %rcx # 8-byte Reload
cmpq $4, %rcx
movq %rax, 80(%rsp) # 8-byte Spill
jb .LBB0_46
# BB#33: # %polly.loop_header10.i.preheader.split.split
movq 80(%rsp), %rax # 8-byte Reload
movq %rax, 72(%rsp) # 8-byte Spill
jmp .LBB0_44
.LBB0_34: # %overflow.checked37
movabsq $0, %rax
movq 144(%rsp), %rcx # 8-byte Reload
addq $1, %rcx
movq 144(%rsp), %rdx # 8-byte Reload
addq $1, %rdx
andq $-4, %rcx
cmpq $0, %rcx
movq %rcx, 64(%rsp) # 8-byte Spill
movq %rdx, 56(%rsp) # 8-byte Spill
movq %rax, 48(%rsp) # 8-byte Spill
je .LBB0_42
# BB#35: # %vector.body28.preheader
movabsq $0, %rax
movl 312(%rsp), %ecx # 4-byte Reload
movl %ecx, %edx
movl %edx, %esi
addq $-1, %rsi
cmpq $0, %rsi
movq %rax, %rdi
cmovgq %rsi, %rdi
addq $1, %rdi
shrq $2, %rdi
shlq $2, %rdi
addq $-4, %rdi
shrq $2, %rdi
addq $1, %rdi
movq %rdi, %rsi
andq $1, %rsi
cmpq $0, %rsi
setne %r8b
cmpq $0, %rdi
sete %r9b
orb %r8b, %r9b
testb $1, %r9b
movq %rdi, 40(%rsp) # 8-byte Spill
movq %rax, 32(%rsp) # 8-byte Spill
jne .LBB0_36
jmp .LBB0_37
.LBB0_36: # %vector.body28.unr
movabsq $4, %rax
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm0
pslldq $8, %xmm0
movaps .LCPI0_0(%rip), %xmm1
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm0, (%rdx)
pshufd $8, %xmm1, %xmm0 # xmm0 = xmm1[0,2,0,0]
movq %xmm0, 8(%rdx)
addq $0, %rax
movq %rax, 32(%rsp) # 8-byte Spill
.LBB0_37: # %vector.body28.preheader.split
movq 32(%rsp), %rax # 8-byte Reload
movq 40(%rsp), %rcx # 8-byte Reload
cmpq $2, %rcx
movq %rax, 24(%rsp) # 8-byte Spill
jb .LBB0_41
# BB#38: # %vector.body28.preheader.split.split
movq 24(%rsp), %rax # 8-byte Reload
movq %rax, 16(%rsp) # 8-byte Spill
jmp .LBB0_39
.LBB0_39: # %vector.body28
# =>This Inner Loop Header: Depth=1
movq 16(%rsp), %rax # 8-byte Reload
movd %rax, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movl $1, %ecx
movl %ecx, %edx
movd %rdx, %xmm1
pslldq $8, %xmm1
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
movaps .LCPI0_0(%rip), %xmm3
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm2 # xmm2 = xmm2[0,2,0,0]
movq 296(%rsp), %rdx # 8-byte Reload
movq %xmm2, (%rdx,%rax,4)
movq %rax, %rsi
orq $2, %rsi
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rsi,4)
movq %rax, %rsi
addq $4, %rsi
movd %rsi, %xmm0
movlhps %xmm0, %xmm0 # xmm0 = xmm0[0,0]
movaps %xmm0, %xmm2
paddq %xmm1, %xmm2
paddq %xmm3, %xmm0
pshufd $8, %xmm2, %xmm1 # xmm1 = xmm2[0,2,0,0]
movq %xmm1, 16(%rdx,%rax,4)
movq %rsi, %rax
orq $2, %rax
pshufd $8, %xmm0, %xmm0 # xmm0 = xmm0[0,2,0,0]
movq %xmm0, (%rdx,%rax,4)
addq $4, %rsi
movq 64(%rsp), %rax # 8-byte Reload
cmpq %rax, %rsi
movq %rsi, 16(%rsp) # 8-byte Spill
jne .LBB0_39
# BB#40: # %middle.block29.loopexit.unr-lcssa
jmp .LBB0_41
.LBB0_41: # %middle.block29.loopexit
movq 64(%rsp), %rax # 8-byte Reload
movq %rax, 48(%rsp) # 8-byte Spill
jmp .LBB0_42
.LBB0_42: # %middle.block29
movq 48(%rsp), %rax # 8-byte Reload
movq 56(%rsp), %rcx # 8-byte Reload
cmpq %rax, %rcx
movq %rax, 136(%rsp) # 8-byte Spill
je .LBB0_47
jmp .LBB0_26
.LBB0_43: # %polly.loop_header.i
# =>This Inner Loop Header: Depth=1
movq 208(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
movq %rax, %rsi
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 4(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 8(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 12(%rdx,%rax,4)
movq %rsi, %rax
addq $1, %rax
movq 288(%rsp), %rdi # 8-byte Reload
cmpq %rdi, %rsi
movq %rax, 208(%rsp) # 8-byte Spill
jg .LBB0_22
jmp .LBB0_43
.LBB0_44: # %polly.loop_header10.i
# =>This Inner Loop Header: Depth=1
movq 72(%rsp), %rax # 8-byte Reload
movl %eax, %ecx
movq 296(%rsp), %rdx # 8-byte Reload
movl %ecx, (%rdx,%rax,4)
movq %rax, %rsi
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 4(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 8(%rdx,%rax,4)
addq $1, %rsi
movl %esi, %ecx
movl %ecx, 12(%rdx,%rax,4)
movq %rsi, %rax
addq $1, %rax
movq 288(%rsp), %rdi # 8-byte Reload
cmpq %rdi, %rsi
movq %rax, 72(%rsp) # 8-byte Spill
jle .LBB0_44
# BB#45: # %vector.body57.preheader.loopexit.unr-lcssa
jmp .LBB0_46
.LBB0_46: # %vector.body57.preheader.loopexit
jmp .LBB0_47
.LBB0_47: # %vector.body57.preheader
jmp .LBB0_48
.LBB0_48: # %vector.body57
movq 296(%rsp), %rax # 8-byte Reload
movups (%rax), %xmm0
movups 16(%rax), %xmm1
movups 32(%rax), %xmm2
movups 48(%rax), %xmm3
paddd %xmm0, %xmm1
paddd %xmm1, %xmm2
paddd %xmm2, %xmm3
movups 64(%rax), %xmm0
paddd %xmm3, %xmm0
movups 80(%rax), %xmm1
paddd %xmm0, %xmm1
movups 96(%rax), %xmm0
paddd %xmm1, %xmm0
movups 112(%rax), %xmm1
paddd %xmm0, %xmm1
movups 128(%rax), %xmm0
paddd %xmm1, %xmm0
movups 144(%rax), %xmm1
paddd %xmm0, %xmm1
movups 160(%rax), %xmm0
paddd %xmm1, %xmm0
movups 176(%rax), %xmm1
paddd %xmm0, %xmm1
movups 192(%rax), %xmm0
paddd %xmm1, %xmm0
movups 208(%rax), %xmm1
paddd %xmm0, %xmm1
movups 224(%rax), %xmm0
paddd %xmm1, %xmm0
movups 240(%rax), %xmm1
paddd %xmm0, %xmm1
movups 256(%rax), %xmm0
paddd %xmm1, %xmm0
movups 272(%rax), %xmm1
paddd %xmm0, %xmm1
movups 288(%rax), %xmm0
paddd %xmm1, %xmm0
movups 304(%rax), %xmm1
paddd %xmm0, %xmm1
movups 320(%rax), %xmm0
paddd %xmm1, %xmm0
movups 336(%rax), %xmm1
paddd %xmm0, %xmm1
movups 352(%rax), %xmm0
paddd %xmm1, %xmm0
movups 368(%rax), %xmm1
paddd %xmm0, %xmm1
movups 384(%rax), %xmm0
paddd %xmm1, %xmm0
pshufd $14, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,0]
paddd %xmm1, %xmm0
pshufd $1, %xmm0, %xmm1 # xmm1 = xmm0[1,0,0,0]
paddd %xmm1, %xmm0
movd %xmm0, %esi
movl $.L.str, %ecx
movl %ecx, %edi
xorl %ecx, %ecx
movb %cl, %dl
movb %dl, %al
callq printf
movl %eax, 12(%rsp) # 4-byte Spill
# BB#49: # %vector.body76
movq 296(%rsp), %rax # 8-byte Reload
movups (%rax), %xmm0
movups 16(%rax), %xmm1
movups 32(%rax), %xmm2
movups 48(%rax), %xmm3
paddd %xmm0, %xmm1
paddd %xmm1, %xmm2
paddd %xmm2, %xmm3
movups 64(%rax), %xmm0
paddd %xmm3, %xmm0
movups 80(%rax), %xmm1
paddd %xmm0, %xmm1
movups 96(%rax), %xmm0
paddd %xmm1, %xmm0
movups 112(%rax), %xmm1
paddd %xmm0, %xmm1
movups 128(%rax), %xmm0
paddd %xmm1, %xmm0
movups 144(%rax), %xmm1
paddd %xmm0, %xmm1
movups 160(%rax), %xmm0
paddd %xmm1, %xmm0
movups 176(%rax), %xmm1
paddd %xmm0, %xmm1
movups 192(%rax), %xmm0
paddd %xmm1, %xmm0
movups 208(%rax), %xmm1
paddd %xmm0, %xmm1
movups 224(%rax), %xmm0
paddd %xmm1, %xmm0
movups 240(%rax), %xmm1
paddd %xmm0, %xmm1
movups 256(%rax), %xmm0
paddd %xmm1, %xmm0
movups 272(%rax), %xmm1
paddd %xmm0, %xmm1
movups 288(%rax), %xmm0
paddd %xmm1, %xmm0
movups 304(%rax), %xmm1
paddd %xmm0, %xmm1
movups 320(%rax), %xmm0
paddd %xmm1, %xmm0
movups 336(%rax), %xmm1
paddd %xmm0, %xmm1
movups 352(%rax), %xmm0
paddd %xmm1, %xmm0
movups 368(%rax), %xmm1
paddd %xmm0, %xmm1
movups 384(%rax), %xmm0
paddd %xmm1, %xmm0
pshufd $14, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,0]
paddd %xmm1, %xmm0
pshufd $1, %xmm0, %xmm1 # xmm1 = xmm0[1,0,0,0]
paddd %xmm1, %xmm0
movd %xmm0, %esi
movl $.L.str1, %ecx
movl %ecx, %edi
xorl %ecx, %ecx
movb %cl, %dl
movb %dl, %al
callq printf
movq 304(%rsp), %rdi # 8-byte Reload
movl %eax, 8(%rsp) # 4-byte Spill
callq free
movl $0, %eax
addq $328, %rsp # imm = 0x148
retq
.Ltmp1:
.size main, .Ltmp1-main
.cfi_endproc
.type .L.str, at object # @.str
.section .rodata.str1.1,"aMS", at progbits,1
.L.str:
.asciz "Sum1 = %d\n"
.size .L.str, 11
.type .L.str1, at object # @.str1
.L.str1:
.asciz "Sum2 = %d\n"
.size .L.str1, 11
.ident "clang version 3.5.0 "
.section ".note.GNU-stack","", at progbits
More information about the llvm-commits
mailing list