[LLVMdev] Trying to optimize out store/load pair
mike woodworth
mikelists at divergentmedia.com
Sun Jul 17 09:51:55 PDT 2011
Hello all,
My app is using LLVM JIT as a runtime engine for image analysis (similar to opencl). I'm placing values into a struct to pass byref to a series of functions. After running the createStandardModulePasses(...) set of optimizations, the function calls are all inlined but the struct store/load pairs haven't been optimized away. I've attached the code below. What I'd like to see is the store / load combos to be opt to registers, and the stores with no associated loads to be removed so further optimizations can be performed.
Thanks for your help,
mike
--
Mike Woodworth
mike at divergentmedia.com
The code (after optimizations):
ModuleID = '/Users/mike/Desktop/Development/ScopeBox3/build/Beta Release/ScopeBox.app/Contents/Resources/Main2vuy.i386kernel'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin9.0.0"
%struct.AKEKernelContext = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
define void @Main(i8* nocapture %inputBuffer, i32 %threadSliceNum) nounwind {
entry:
%.compoundliteral = alloca %struct.AKEKernelContext, align 4
%x139 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 0
%y141 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 1
%yVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 2
%crVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 3
%cbVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 4
%rVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 5
%gVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 6
%bVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 7
%threadID = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 8
%.idx.i = getelementptr %struct.AKEKernelContext* %.compoundliteral, i64 0, i32 7
br label %for.cond21.preheader
for.cond21.preheader: ; preds = %for.inc152, %entry
%indvar10 = phi i64 [ %indvar.next11, %for.inc152 ], [ 0, %entry ]
%tmp20 = mul i64 %indvar10, 3840
%tmp2227 = or i64 %tmp20, 1
%tmp2428 = or i64 %tmp20, 2
%tmp26 = trunc i64 %indvar10 to i32
br label %for.body25
for.body25: ; preds = %for.body25, %for.cond21.preheader
%indvar4 = phi i64 [ 0, %for.cond21.preheader ], [ %indvar.next5, %for.body25 ]
%tmp19 = shl i64 %indvar4, 2
%tmp21 = add i64 %tmp20, %tmp19
%arrayidx = getelementptr i8* %inputBuffer, i64 %tmp21
%tmp23 = add i64 %tmp2227, %tmp19
%arrayidx42 = getelementptr i8* %inputBuffer, i64 %tmp23
%tmp25 = add i64 %tmp2428, %tmp19
%arrayidx49 = getelementptr i8* %inputBuffer, i64 %tmp25
%tmp7 = shl i64 %indvar4, 1
%tmp = trunc i64 %tmp7 to i32
%tmp37 = load i8* %arrayidx, align 1, !tbaa !0
%conv = zext i8 %tmp37 to i32
%tmp43 = load i8* %arrayidx42, align 1, !tbaa !0
%conv44 = zext i8 %tmp43 to i32
%tmp50 = load i8* %arrayidx49, align 1, !tbaa !0
%conv51 = zext i8 %tmp50 to i32
%sub.i183 = add nsw i32 %conv, -128
%sub6.i184 = add nsw i32 %conv51, -128
%tmp.i185 = mul i32 %conv44, 298
%mul17.i186 = mul nsw i32 %sub6.i184, 459
%add18.i187 = add i32 %tmp.i185, -4640
%add19.i188 = add i32 %mul17.i186, %add18.i187
%shr.i189 = ashr i32 %add19.i188, 8
%mul26.i190 = mul nsw i32 %sub.i183, -55
%mul30.i191 = mul nsw i32 %sub6.i184, -136
%add31.i192 = add i32 %add18.i187, %mul26.i190
%add32.i193 = add i32 %add31.i192, %mul30.i191
%shr33.i194 = ashr i32 %add32.i193, 8
%mul40.i195 = mul nsw i32 %sub.i183, 541
%add46.i196 = add i32 %add18.i187, %mul40.i195
%shr47.i197 = ashr i32 %add46.i196, 8
store i32 %tmp, i32* %x139, align 4, !tbaa !2 /// these stores should be removed
store i32 %tmp26, i32* %y141, align 4, !tbaa !2
store i32 %conv44, i32* %yVal, align 4, !tbaa !2
store i32 %conv, i32* %crVal, align 4, !tbaa !2
store i32 %conv51, i32* %cbVal, align 4, !tbaa !2
store i32 %shr.i189, i32* %rVal, align 4, !tbaa !2
store i32 %shr33.i194, i32* %gVal, align 4, !tbaa !2
store i32 %shr47.i197, i32* %bVal, align 4, !tbaa !2
store i32 %threadSliceNum, i32* %threadID, align 4, !tbaa !2
%.idx.val.i = load i32* %.idx.i, align 4, !tbaa !2 /// and this load replaced by the original reg (%shr47.i197 above)
%arrayidx.i.i.i = getelementptr inbounds i32* inttoptr (i32 33807872 to i32*), i32 %.idx.val.i
%tmp6.i.i.i = load i32* %arrayidx.i.i.i, align 4, !tbaa !2
%inc.i.i.i = add i32 %tmp6.i.i.i, 1
store i32 %inc.i.i.i, i32* %arrayidx.i.i.i, align 4, !tbaa !2
%arrayidx.i.i6.i = getelementptr inbounds i32* inttoptr (i32 33812992 to i32*), i32 %.idx.val.i
%tmp6.i.i7.i = load i32* %arrayidx.i.i6.i, align 4, !tbaa !2
%inc.i.i8.i = add i32 %tmp6.i.i7.i, 1
store i32 %inc.i.i8.i, i32* %arrayidx.i.i6.i, align 4, !tbaa !2
%arrayidx.i.i3.i = getelementptr inbounds i32* inttoptr (i32 33818112 to i32*), i32 %.idx.val.i
%tmp6.i.i4.i = load i32* %arrayidx.i.i3.i, align 4, !tbaa !2
%inc.i.i5.i = add i32 %tmp6.i.i4.i, 1
store i32 %inc.i.i5.i, i32* %arrayidx.i.i3.i, align 4, !tbaa !2
%indvar.next5 = add i64 %indvar4, 1
%exitcond6 = icmp eq i64 %indvar.next5, 960
br i1 %exitcond6, label %for.inc152, label %for.body25
for.inc152: ; preds = %for.body25
%indvar.next11 = add i64 %indvar10, 1
%exitcond18 = icmp eq i64 %indvar.next11, 1080
br i1 %exitcond18, label %for.end154, label %for.cond21.preheader
for.end154: ; preds = %for.inc152
ret void
}
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
!2 = metadata !{metadata !"int", metadata !0}
More information about the llvm-dev
mailing list