<div dir="ltr">Sure, I'll take a look.<div>I'll need to dig up SPEC2000 first. :-)</div><div><br></div><div>I'll let you know if I have problems reproducing.</div><div><br></div><div>Thanks for letting me know,<br></div><div> Michael</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Feb 16, 2017 at 3:03 PM, Steven Wu <span dir="ltr"><<a href="mailto:stevenwu@apple.com" target="_blank">stevenwu@apple.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi Michael<br>
<br>
This commit seems to break 177_mesa in SPEC2000 on x86_64 with -Os (at least on macOS). Can you take a look? Let me know if you need my help to reproduce the issue or ping down the issue.<br>
<br>
Thanks<br>
<span class="HOEnZb"><font color="#888888"><br>
Steven<br>
</font></span><div class="HOEnZb"><div class="h5"><br>
> On Feb 3, 2017, at 11:09 AM, Michael Kuperstein via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br>
><br>
> Author: mkuper<br>
> Date: Fri Feb 3 13:09:45 2017<br>
> New Revision: 294027<br>
><br>
> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=294027&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=294027&view=rev</a><br>
> Log:<br>
> [SLP] Use SCEV to sort memory accesses.<br>
><br>
> This generalizes memory access sorting to use differences between SCEVs,<br>
> instead of relying on constant offsets. That allows us to properly do<br>
> SLP vectorization of non-sequentially ordered loads within loops bodies.<br>
><br>
> Differential Revision: <a href="https://reviews.llvm.org/D29425" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D29425</a><br>
><br>
> Modified:<br>
> llvm/trunk/include/llvm/<wbr>Analysis/LoopAccessAnalysis.h<br>
> llvm/trunk/lib/Analysis/<wbr>LoopAccessAnalysis.cpp<br>
> llvm/trunk/test/Transforms/<wbr>SLPVectorizer/X86/jumbled-<wbr>load.ll<br>
><br>
> Modified: llvm/trunk/include/llvm/<wbr>Analysis/LoopAccessAnalysis.h<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=294027&r1=294026&r2=294027&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/include/<wbr>llvm/Analysis/<wbr>LoopAccessAnalysis.h?rev=<wbr>294027&r1=294026&r2=294027&<wbr>view=diff</a><br>
> ==============================<wbr>==============================<wbr>==================<br>
> --- llvm/trunk/include/llvm/<wbr>Analysis/LoopAccessAnalysis.h (original)<br>
> +++ llvm/trunk/include/llvm/<wbr>Analysis/LoopAccessAnalysis.h Fri Feb 3 13:09:45 2017<br>
> @@ -690,8 +690,14 @@ int64_t getPtrStride(<wbr>PredicatedScalarEvo<br>
> const ValueToValueMap &StridesMap = ValueToValueMap(),<br>
> bool Assume = false, bool ShouldCheckWrap = true);<br>
><br>
> -/// \brief Saves the sorted memory accesses in vector argument 'Sorted' after<br>
> -/// sorting the jumbled memory accesses.<br>
> +/// \brief Try to sort an array of loads / stores.<br>
> +///<br>
> +/// If all pointers refer to the same object, and the differences between all<br>
> +/// pointer operands are known to be constant, the array is sorted by offset,<br>
> +/// and returned in \p Sorted.<br>
> +///<br>
> +/// If those conditions do not hold, the output array is an arbitrary<br>
> +/// permutation of the input.<br>
> void sortMemAccesses(ArrayRef<Value *> VL, const DataLayout &DL,<br>
> ScalarEvolution &SE, SmallVectorImpl<Value *> &Sorted);<br>
><br>
><br>
> Modified: llvm/trunk/lib/Analysis/<wbr>LoopAccessAnalysis.cpp<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=294027&r1=294026&r2=294027&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Analysis/LoopAccessAnalysis.<wbr>cpp?rev=294027&r1=294026&r2=<wbr>294027&view=diff</a><br>
> ==============================<wbr>==============================<wbr>==================<br>
> --- llvm/trunk/lib/Analysis/<wbr>LoopAccessAnalysis.cpp (original)<br>
> +++ llvm/trunk/lib/Analysis/<wbr>LoopAccessAnalysis.cpp Fri Feb 3 13:09:45 2017<br>
> @@ -1058,30 +1058,47 @@ static unsigned getAddressSpaceOperand(V<br>
> return -1;<br>
> }<br>
><br>
> -/// Saves the memory accesses after sorting it into vector argument 'Sorted'.<br>
> void llvm::sortMemAccesses(<wbr>ArrayRef<Value *> VL, const DataLayout &DL,<br>
> ScalarEvolution &SE,<br>
> SmallVectorImpl<Value *> &Sorted) {<br>
> - SmallVector<std::pair<int, Value *>, 4> OffValPairs;<br>
> + SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;<br>
> + OffValPairs.reserve(VL.size())<wbr>;<br>
> + Sorted.reserve(VL.size());<br>
> +<br>
> + // Walk over the pointers, and map each of them to an offset relative to<br>
> + // first pointer in the array.<br>
> + Value *Ptr0 = getPointerOperand(VL[0]);<br>
> + const SCEV *Scev0 = SE.getSCEV(Ptr0);<br>
> + Value *Obj0 = GetUnderlyingObject(Ptr0, DL);<br>
> +<br>
> for (auto *Val : VL) {<br>
> - // Compute the constant offset from the base pointer of each memory accesses<br>
> - // and insert into the vector of key,value pair which needs to be sorted.<br>
> Value *Ptr = getPointerOperand(Val);<br>
> - unsigned AS = getAddressSpaceOperand(Val);<br>
> - unsigned PtrBitWidth = DL.getPointerSizeInBits(AS);<br>
> - Type *Ty = cast<PointerType>(Ptr-><wbr>getType())->getElementType();<br>
> - APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));<br>
> -<br>
> - // FIXME: Currently the offsets are assumed to be constant.However this not<br>
> - // always true as offsets can be variables also and we would need to<br>
> - // consider the difference of the variable offsets.<br>
> - APInt Offset(PtrBitWidth, 0);<br>
> - Ptr-><wbr>stripAndAccumulateInBoundsCons<wbr>tantOffsets(DL, Offset);<br>
> - OffValPairs.push_back(std::<wbr>make_pair(Offset.getSExtValue(<wbr>), Val));<br>
> +<br>
> + // If a pointer refers to a different underlying object, bail - the<br>
> + // pointers are by definition incomparable.<br>
> + Value *CurrObj = GetUnderlyingObject(Ptr, DL);<br>
> + if (CurrObj != Obj0) {<br>
> + Sorted.append(VL.begin(), VL.end());<br>
> + return;<br>
> + }<br>
> +<br>
> + const SCEVConstant *Diff =<br>
> + dyn_cast<SCEVConstant>(SE.<wbr>getMinusSCEV(SE.getSCEV(Ptr), Scev0));<br>
> +<br>
> + // The pointers may not have a constant offset from each other, or SCEV<br>
> + // may just not be smart enough to figure out they do. Regardless,<br>
> + // there's nothing we can do.<br>
> + if (!Diff) {<br>
> + Sorted.append(VL.begin(), VL.end());<br>
> + return;<br>
> + }<br>
> +<br>
> + OffValPairs.emplace_back(Diff-<wbr>>getAPInt().getSExtValue(), Val);<br>
> }<br>
> +<br>
> std::sort(OffValPairs.begin(), OffValPairs.end(),<br>
> - [](const std::pair<int, Value *> &Left,<br>
> - const std::pair<int, Value *> &Right) {<br>
> + [](const std::pair<int64_t, Value *> &Left,<br>
> + const std::pair<int64_t, Value *> &Right) {<br>
> return Left.first < Right.first;<br>
> });<br>
><br>
><br>
> Modified: llvm/trunk/test/Transforms/<wbr>SLPVectorizer/X86/jumbled-<wbr>load.ll<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load.ll?rev=294027&r1=294026&r2=294027&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/SLPVectorizer/X86/<wbr>jumbled-load.ll?rev=294027&r1=<wbr>294026&r2=294027&view=diff</a><br>
> ==============================<wbr>==============================<wbr>==================<br>
> --- llvm/trunk/test/Transforms/<wbr>SLPVectorizer/X86/jumbled-<wbr>load.ll (original)<br>
> +++ llvm/trunk/test/Transforms/<wbr>SLPVectorizer/X86/jumbled-<wbr>load.ll Fri Feb 3 13:09:45 2017<br>
> @@ -1,18 +1,18 @@<br>
> ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py<br>
> ; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-threshold=-10 -slp-vectorizer | FileCheck %s<br>
><br>
> -<br>
> +@total = common global i32 0, align 4<br>
><br>
> define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {<br>
> ; CHECK-LABEL: @jumbled-load(<br>
> -; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* %in, i64 0<br>
> +; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0<br>
> ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3<br>
> ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1<br>
> ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2<br>
> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*<br>
> ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4<br>
> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0><br>
> -; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* %inn, i64 0<br>
> +; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0<br>
> ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2<br>
> ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3<br>
> ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1<br>
> @@ -20,10 +20,10 @@ define i32 @jumbled-load(i32* noalias no<br>
> ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4<br>
> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2><br>
> ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP3]], [[TMP6]]<br>
> -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* %out, i64 0<br>
> -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* %out, i64 1<br>
> -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* %out, i64 2<br>
> -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* %out, i64 3<br>
> +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0<br>
> +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1<br>
> +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2<br>
> +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3<br>
> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*<br>
> ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4<br>
> ; CHECK-NEXT: ret i32 undef<br>
> @@ -59,3 +59,116 @@ define i32 @jumbled-load(i32* noalias no<br>
><br>
> ret i32 undef<br>
> }<br>
> +<br>
> +; Make sure we can sort loads even if they have non-constant offsets, as long as<br>
> +; the offset *differences* are constant and computable by SCEV.<br>
> +define void @scev(i64 %N, i32* nocapture readonly %b, i32* nocapture readonly %c) {<br>
> +; CHECK-LABEL: @scev(<br>
> +; CHECK-NEXT: entry:<br>
> +; CHECK-NEXT: [[CMP_OUTER:%.*]] = icmp sgt i64 [[N:%.*]], 0<br>
> +; CHECK-NEXT: br i1 [[CMP_OUTER]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]<br>
> +; CHECK: for.body.preheader:<br>
> +; CHECK-NEXT: br label [[FOR_BODY:%.*]]<br>
> +; CHECK: for.body:<br>
> +; CHECK-NEXT: [[I_P:%.*]] = phi i64 [ [[ADD21:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]<br>
> +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP14:%.*]], [[FOR_BODY]] ], [ zeroinitializer, [[FOR_BODY_PREHEADER]] ]<br>
> +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[I_P]]<br>
> +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[I_P]]<br>
> +; CHECK-NEXT: [[ADD3:%.*]] = or i64 [[I_P]], 1<br>
> +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[ADD3]]<br>
> +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[ADD3]]<br>
> +; CHECK-NEXT: [[ADD9:%.*]] = or i64 [[I_P]], 2<br>
> +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[ADD9]]<br>
> +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[ADD9]]<br>
> +; CHECK-NEXT: [[ADD15:%.*]] = or i64 [[I_P]], 3<br>
> +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[ADD15]]<br>
> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*<br>
> +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4<br>
> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3><br>
> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*<br>
> +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4<br>
> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3><br>
> +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[ADD15]]<br>
> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*<br>
> +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4<br>
> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3><br>
> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*<br>
> +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4<br>
> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3><br>
> +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP3]], [[TMP0]]<br>
> +; CHECK-NEXT: [[TMP14]] = add <4 x i32> [[TMP13]], [[TMP12]]<br>
> +; CHECK-NEXT: [[ADD21]] = add nuw nsw i64 [[I_P]], 4<br>
> +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[ADD21]], [[N]]<br>
> +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]<br>
> +; CHECK: for.end.loopexit:<br>
> +; CHECK-NEXT: br label [[FOR_END]]<br>
> +; CHECK: for.end:<br>
> +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP14]], [[FOR_END_LOOPEXIT]] ]<br>
> +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP15]], i32 0<br>
> +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP15]], i32 1<br>
> +; CHECK-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]<br>
> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP15]], i32 2<br>
> +; CHECK-NEXT: [[ADD23:%.*]] = add nsw i32 [[ADD22]], [[TMP18]]<br>
> +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP15]], i32 3<br>
> +; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD23]], [[TMP19]]<br>
> +; CHECK-NEXT: store i32 [[ADD24]], i32* @total, align 4<br>
> +; CHECK-NEXT: ret void<br>
> +;<br>
> +entry:<br>
> + %cmp.outer = icmp sgt i64 %N, 0<br>
> + br i1 %cmp.outer, label %for.body.preheader, label %for.end<br>
> +<br>
> +for.body.preheader: ; preds = %entry<br>
> + br label %for.body<br>
> +<br>
> +for.body: ; preds = %for.body.preheader, %for.body<br>
> + %a4.p = phi i32 [ %add20, %for.body ], [ 0, %for.body.preheader ]<br>
> + %a3.p = phi i32 [ %add2, %for.body ], [ 0, %for.body.preheader ]<br>
> + %a2.p = phi i32 [ %add8, %for.body ], [ 0, %for.body.preheader ]<br>
> + %a1.p = phi i32 [ %add14, %for.body ], [ 0, %for.body.preheader ]<br>
> + %i.p = phi i64 [ %add21, %for.body ], [ 0, %for.body.preheader ]<br>
> + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.p<br>
> + %0 = load i32, i32* %arrayidx, align 4<br>
> + %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.p<br>
> + %1 = load i32, i32* %arrayidx1, align 4<br>
> + %add = add i32 %0, %a3.p<br>
> + %add2 = add i32 %add, %1<br>
> + %add3 = or i64 %i.p, 1<br>
> + %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %add3<br>
> + %2 = load i32, i32* %arrayidx4, align 4<br>
> + %arrayidx6 = getelementptr inbounds i32, i32* %c, i64 %add3<br>
> + %3 = load i32, i32* %arrayidx6, align 4<br>
> + %add7 = add i32 %2, %a2.p<br>
> + %add8 = add i32 %add7, %3<br>
> + %add9 = or i64 %i.p, 2<br>
> + %arrayidx10 = getelementptr inbounds i32, i32* %b, i64 %add9<br>
> + %4 = load i32, i32* %arrayidx10, align 4<br>
> + %arrayidx12 = getelementptr inbounds i32, i32* %c, i64 %add9<br>
> + %5 = load i32, i32* %arrayidx12, align 4<br>
> + %add13 = add i32 %4, %a1.p<br>
> + %add14 = add i32 %add13, %5<br>
> + %add15 = or i64 %i.p, 3<br>
> + %arrayidx16 = getelementptr inbounds i32, i32* %b, i64 %add15<br>
> + %6 = load i32, i32* %arrayidx16, align 4<br>
> + %arrayidx18 = getelementptr inbounds i32, i32* %c, i64 %add15<br>
> + %7 = load i32, i32* %arrayidx18, align 4<br>
> + %add19 = add i32 %6, %a4.p<br>
> + %add20 = add i32 %add19, %7<br>
> + %add21 = add nuw nsw i64 %i.p, 4<br>
> + %cmp = icmp slt i64 %add21, %N<br>
> + br i1 %cmp, label %for.body, label %for.end.loopexit<br>
> +<br>
> +for.end.loopexit: ; preds = %for.body<br>
> + br label %for.end<br>
> +<br>
> +for.end: ; preds = %for.end.loopexit, %entry<br>
> + %a1.0.lcssa = phi i32 [ 0, %entry ], [ %add14, %for.end.loopexit ]<br>
> + %a2.0.lcssa = phi i32 [ 0, %entry ], [ %add8, %for.end.loopexit ]<br>
> + %a3.0.lcssa = phi i32 [ 0, %entry ], [ %add2, %for.end.loopexit ]<br>
> + %a4.0.lcssa = phi i32 [ 0, %entry ], [ %add20, %for.end.loopexit ]<br>
> + %add22 = add nsw i32 %a2.0.lcssa, %a1.0.lcssa<br>
> + %add23 = add nsw i32 %add22, %a3.0.lcssa<br>
> + %add24 = add nsw i32 %add23, %a4.0.lcssa<br>
> + store i32 %add24, i32* @total, align 4<br>
> + ret void<br>
> +}<br>
><br>
><br>
> ______________________________<wbr>_________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
> <a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
<br>
</div></div></blockquote></div><br></div>