[LLVMdev] Trying to optimize out store/load pair

Sun Jul 17 09:51:55 PDT 2011

Hello all,

My app is using LLVM JIT as a runtime engine for image analysis (similar to opencl). I'm placing values into a struct to pass byref to a series of functions. After running the createStandardModulePasses(...) set of optimizations, the function calls are all inlined but the struct store/load pairs haven't been optimized away. I've attached the code below. What I'd like to see is the store / load combos to be opt to registers, and the stores with no associated loads to be removed so further optimizations can be performed.

Thanks for your help,
mike
--
Mike Woodworth
mike at divergentmedia.com

The code (after optimizations):

ModuleID = '/Users/mike/Desktop/Development/ScopeBox3/build/Beta Release/ScopeBox.app/Contents/Resources/Main2vuy.i386kernel'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin9.0.0"

%struct.AKEKernelContext = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }

define void @Main(i8* nocapture %inputBuffer, i32 %threadSliceNum) nounwind {
entry:
 %.compoundliteral = alloca %struct.AKEKernelContext, align 4
 %x139 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 0
 %y141 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 1
 %yVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 2
 %crVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 3
 %cbVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 4
 %rVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 5
 %gVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 6
 %bVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 7
 %threadID = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 8
 %.idx.i = getelementptr %struct.AKEKernelContext* %.compoundliteral, i64 0, i32 7
 br label %for.cond21.preheader

for.cond21.preheader:                             ; preds = %for.inc152, %entry
 %indvar10 = phi i64 [ %indvar.next11, %for.inc152 ], [ 0, %entry ]
 %tmp20 = mul i64 %indvar10, 3840
 %tmp2227 = or i64 %tmp20, 1
 %tmp2428 = or i64 %tmp20, 2
 %tmp26 = trunc i64 %indvar10 to i32
 br label %for.body25

for.body25:                                       ; preds = %for.body25, %for.cond21.preheader
 %indvar4 = phi i64 [ 0, %for.cond21.preheader ], [ %indvar.next5, %for.body25 ]
 %tmp19 = shl i64 %indvar4, 2
 %tmp21 = add i64 %tmp20, %tmp19
 %arrayidx = getelementptr i8* %inputBuffer, i64 %tmp21
 %tmp23 = add i64 %tmp2227, %tmp19
 %arrayidx42 = getelementptr i8* %inputBuffer, i64 %tmp23
 %tmp25 = add i64 %tmp2428, %tmp19
 %arrayidx49 = getelementptr i8* %inputBuffer, i64 %tmp25
 %tmp7 = shl i64 %indvar4, 1
 %tmp = trunc i64 %tmp7 to i32
 %tmp37 = load i8* %arrayidx, align 1, !tbaa !0
 %conv = zext i8 %tmp37 to i32
 %tmp43 = load i8* %arrayidx42, align 1, !tbaa !0
 %conv44 = zext i8 %tmp43 to i32
 %tmp50 = load i8* %arrayidx49, align 1, !tbaa !0
 %conv51 = zext i8 %tmp50 to i32
 %sub.i183 = add nsw i32 %conv, -128
 %sub6.i184 = add nsw i32 %conv51, -128
 %tmp.i185 = mul i32 %conv44, 298
 %mul17.i186 = mul nsw i32 %sub6.i184, 459
 %add18.i187 = add i32 %tmp.i185, -4640
 %add19.i188 = add i32 %mul17.i186, %add18.i187
 %shr.i189 = ashr i32 %add19.i188, 8
 %mul26.i190 = mul nsw i32 %sub.i183, -55
 %mul30.i191 = mul nsw i32 %sub6.i184, -136
 %add31.i192 = add i32 %add18.i187, %mul26.i190
 %add32.i193 = add i32 %add31.i192, %mul30.i191
 %shr33.i194 = ashr i32 %add32.i193, 8
 %mul40.i195 = mul nsw i32 %sub.i183, 541
 %add46.i196 = add i32 %add18.i187, %mul40.i195
 %shr47.i197 = ashr i32 %add46.i196, 8
 store i32 %tmp, i32* %x139, align 4, !tbaa !2                        /// these stores should be removed
 store i32 %tmp26, i32* %y141, align 4, !tbaa !2
 store i32 %conv44, i32* %yVal, align 4, !tbaa !2
 store i32 %conv, i32* %crVal, align 4, !tbaa !2
 store i32 %conv51, i32* %cbVal, align 4, !tbaa !2
 store i32 %shr.i189, i32* %rVal, align 4, !tbaa !2
 store i32 %shr33.i194, i32* %gVal, align 4, !tbaa !2
 store i32 %shr47.i197, i32* %bVal, align 4, !tbaa !2
 store i32 %threadSliceNum, i32* %threadID, align 4, !tbaa !2
 %.idx.val.i = load i32* %.idx.i, align 4, !tbaa !2                      /// and this load replaced by the original reg (%shr47.i197 above)
 %arrayidx.i.i.i = getelementptr inbounds i32* inttoptr (i32 33807872 to i32*), i32 %.idx.val.i
 %tmp6.i.i.i = load i32* %arrayidx.i.i.i, align 4, !tbaa !2
 %inc.i.i.i = add i32 %tmp6.i.i.i, 1
 store i32 %inc.i.i.i, i32* %arrayidx.i.i.i, align 4, !tbaa !2
 %arrayidx.i.i6.i = getelementptr inbounds i32* inttoptr (i32 33812992 to i32*), i32 %.idx.val.i
 %tmp6.i.i7.i = load i32* %arrayidx.i.i6.i, align 4, !tbaa !2
 %inc.i.i8.i = add i32 %tmp6.i.i7.i, 1
 store i32 %inc.i.i8.i, i32* %arrayidx.i.i6.i, align 4, !tbaa !2
 %arrayidx.i.i3.i = getelementptr inbounds i32* inttoptr (i32 33818112 to i32*), i32 %.idx.val.i
 %tmp6.i.i4.i = load i32* %arrayidx.i.i3.i, align 4, !tbaa !2
 %inc.i.i5.i = add i32 %tmp6.i.i4.i, 1
 store i32 %inc.i.i5.i, i32* %arrayidx.i.i3.i, align 4, !tbaa !2
 %indvar.next5 = add i64 %indvar4, 1
 %exitcond6 = icmp eq i64 %indvar.next5, 960
 br i1 %exitcond6, label %for.inc152, label %for.body25

for.inc152:                                       ; preds = %for.body25
 %indvar.next11 = add i64 %indvar10, 1
 %exitcond18 = icmp eq i64 %indvar.next11, 1080
 br i1 %exitcond18, label %for.end154, label %for.cond21.preheader

for.end154:                                       ; preds = %for.inc152
 ret void
}

!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
!2 = metadata !{metadata !"int", metadata !0}