[llvm] r319142 - [DAGCombine] Disable finding better chains for stores at O0

Simon Dardis via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 27 20:07:59 PST 2017


Author: sdardis
Date: Mon Nov 27 20:07:59 2017
New Revision: 319142

URL: http://llvm.org/viewvc/llvm-project?rev=319142&view=rev
Log:
[DAGCombine] Disable finding better chains for stores at O0

Unoptimized IR can have linear sequences of stores to an array, where the
initial GEP for the first store is formed from the pointer to the array, and the
GEP for each store after the first is formed from the previous GEP with some
offset in an inductive fashion.

The (large) resulting DAG when analyzed by DAGCombine undergoes an excessive
number of combines as each store node is examined every time its' offset node
is combined with any child of the offset. One of the transformations is
findBetterNeighborChains which assists MergeConsecutiveStores. The former
relies on repeated chain walking to do its' work, however MergeConsecutiveStores
is disabled at O0 which makes the transformation redundant.

Any optimization level other than O0 would invoke InstCombine which would
resolve the chain of GEPs into flat base + offset GEP for each store which
does not exhibit the repeated examination of each store to the array.

Disabling this optimization fixes an excessive compile time issue (30~ minutes
for the test case provided) at O0.

Reviewers: niravd, craig.topper, t.p.northover

Differential Revision: https://reviews.llvm.org/D40193

Added:
    llvm/trunk/test/CodeGen/Mips/dagcombine-store-gep-chain-slow.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll
    llvm/trunk/test/CodeGen/X86/pr32284.ll
    llvm/trunk/test/CodeGen/X86/pr32345.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=319142&r1=319141&r2=319142&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Nov 27 20:07:59 2017
@@ -17424,7 +17424,11 @@ void DAGCombiner::GatherAllAliases(SDNod
 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
 /// (aliasing node.)
 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
-  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
+  if (OptLevel == CodeGenOpt::None)
+    return OldChain;
+
+  // Ops for replacing token factor.
+  SmallVector<SDValue, 8> Aliases;
 
   // Accumulate all the aliases to this node.
   GatherAllAliases(N, OldChain, Aliases);
@@ -17454,6 +17458,9 @@ SDValue DAGCombiner::FindBetterChain(SDN
 // to go from a partially-merged state to the desired final
 // fully-merged state.
 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);

Modified: llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll?rev=319142&r1=319141&r2=319142&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll Mon Nov 27 20:07:59 2017
@@ -12,21 +12,22 @@ entry:
 ; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
 ; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
-; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: mov     r[[R1:[0-9]+]], #32
+; CHECK: add     r[[R3:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: add     r[[R3:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
 ; CHECK: mov     r[[R2:[0-9]+]], sp
-; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
-; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: add     r[[R3:[0-9]+]], r[[R2]], #48
 ; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
-; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
-; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: add     r[[R4:[0-9]+]], r[[R2]], #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
+; CHECK: mov     r[[R5:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
 ; CHECK: add     r[[R1:[0-9]+]], r0, #48
 ; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
 ; CHECK: add     r[[R1:[0-9]+]], r0, #32
@@ -45,24 +46,25 @@ define void @test2(<16 x float>* noalias
 entry:
 ; CHECK-LABEL: test2:
 ; CHECK: ldr     r[[R1:[0-9]+]], [pc, r[[R1]]]
-; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
-; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
 ; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]!
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: mov     r[[R1:[0-9]+]], #32
-; CHECK: mov     r[[R2:[0-9]+]], sp
-; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
-; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
-; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
-; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
-; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: mov     r[[R1:[0-9]+]], sp
+; CHECK: orr     r[[R2:[0-9]+]], r[[R1]], #16
 ; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: mov     r[[R3:[0-9]+]], #32
+; CHECK: mov     r[[R9:[0-9]+]], r[[R1]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128], r[[R3]]
+; CHECK: mov     r[[R3:[0-9]+]], r[[R9]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128]
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
 ; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
 ; CHECK: add     r[[R1:[0-9]+]], r0, #48
 ; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
 ; CHECK: add     r[[R1:[0-9]+]], r0, #32

Added: llvm/trunk/test/CodeGen/Mips/dagcombine-store-gep-chain-slow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/dagcombine-store-gep-chain-slow.ll?rev=319142&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/dagcombine-store-gep-chain-slow.ll (added)
+++ llvm/trunk/test/CodeGen/Mips/dagcombine-store-gep-chain-slow.ll Mon Nov 27 20:07:59 2017
@@ -0,0 +1,613 @@
+; RUN: llc < %s -march=mips -mcpu=mips32r6 -o /dev/null
+
+; Test that this file is compiled in a reasonable time period. Without the
+; optimization level check in findBetterNeighbors, this test demonstrates
+; a severe compile time regression (30~ minutes) vs. <10 seconds at 'optnone'.
+
+declare i8 @k(i8*)
+
+define void @d(i32 signext %e4) #1 {
+entry:
+  %e4.addr = alloca i32, align 4
+  %old_val = alloca i8, align 1
+  %new_val = alloca i8, align 1
+  %simd = alloca i8, align 1
+  %code = alloca [269 x i8], align 1
+  store i32 %e4, i32* %e4.addr, align 4
+  %call = call zeroext i8 @k(i8* %simd)
+  store i8 %call, i8* %simd, align 1
+
+  %arrayinit.begin = getelementptr inbounds [269 x i8], [269 x i8]* %code, i32 0, i32 0
+  store i8 32, i8* %arrayinit.begin, align 1
+  %arrayinit.element = getelementptr inbounds i8, i8* %arrayinit.begin, i32 1
+  %a2 = load i8, i8* %old_val, align 1
+  store i8 %a2, i8* %arrayinit.element, align 1
+  %arrayinit.element1 = getelementptr inbounds i8, i8* %arrayinit.element, i32 1
+  store i8 -3, i8* %arrayinit.element1, align 1
+  %arrayinit.element2 = getelementptr inbounds i8, i8* %arrayinit.element1, i32 1
+  store i8 0, i8* %arrayinit.element2, align 1
+  %arrayinit.element3 = getelementptr inbounds i8, i8* %arrayinit.element2, i32 1
+  store i8 33, i8* %arrayinit.element3, align 1
+  %arrayinit.element4 = getelementptr inbounds i8, i8* %arrayinit.element3, i32 1
+  %a3 = load i8, i8* %simd, align 1
+  store i8 %a3, i8* %arrayinit.element4, align 1
+  %arrayinit.element5 = getelementptr inbounds i8, i8* %arrayinit.element4, i32 1
+  store i8 32, i8* %arrayinit.element5, align 1
+  %arrayinit.element6 = getelementptr inbounds i8, i8* %arrayinit.element5, i32 1
+  %a4 = load i8, i8* %simd, align 1
+  store i8 %a4, i8* %arrayinit.element6, align 1
+  %arrayinit.element7 = getelementptr inbounds i8, i8* %arrayinit.element6, i32 1
+  store i8 32, i8* %arrayinit.element7, align 1
+  %arrayinit.element8 = getelementptr inbounds i8, i8* %arrayinit.element7, i32 1
+  %a5 = load i8, i8* %new_val, align 1
+  store i8 %a5, i8* %arrayinit.element8, align 1
+  %arrayinit.element9 = getelementptr inbounds i8, i8* %arrayinit.element8, i32 1
+  store i8 -3, i8* %arrayinit.element9, align 1
+  %arrayinit.element10 = getelementptr inbounds i8, i8* %arrayinit.element9, i32 1
+  store i8 2, i8* %arrayinit.element10, align 1
+  %arrayinit.element11 = getelementptr inbounds i8, i8* %arrayinit.element10, i32 1
+  store i8 0, i8* %arrayinit.element11, align 1
+  %arrayinit.element12 = getelementptr inbounds i8, i8* %arrayinit.element11, i32 1
+  store i8 33, i8* %arrayinit.element12, align 1
+  %arrayinit.element13 = getelementptr inbounds i8, i8* %arrayinit.element12, i32 1
+  %a6 = load i8, i8* %simd, align 1
+  store i8 %a6, i8* %arrayinit.element13, align 1
+  %arrayinit.element14 = getelementptr inbounds i8, i8* %arrayinit.element13, i32 1
+  store i8 32, i8* %arrayinit.element14, align 1
+  %arrayinit.element15 = getelementptr inbounds i8, i8* %arrayinit.element14, i32 1
+  %a7 = load i8, i8* %new_val, align 1
+  store i8 %a7, i8* %arrayinit.element15, align 1
+  %arrayinit.element16 = getelementptr inbounds i8, i8* %arrayinit.element15, i32 1
+  store i8 32, i8* %arrayinit.element16, align 1
+  %arrayinit.element17 = getelementptr inbounds i8, i8* %arrayinit.element16, i32 1
+  %a8 = load i8, i8* %simd, align 1
+  store i8 %a8, i8* %arrayinit.element17, align 1
+  %arrayinit.element18 = getelementptr inbounds i8, i8* %arrayinit.element17, i32 1
+  store i8 -3, i8* %arrayinit.element18, align 1
+  %arrayinit.element19 = getelementptr inbounds i8, i8* %arrayinit.element18, i32 1
+  store i8 1, i8* %arrayinit.element19, align 1
+  %arrayinit.element20 = getelementptr inbounds i8, i8* %arrayinit.element19, i32 1
+  store i8 0, i8* %arrayinit.element20, align 1
+  %arrayinit.element21 = getelementptr inbounds i8, i8* %arrayinit.element20, i32 1
+  store i8 92, i8* %arrayinit.element21, align 1
+  %arrayinit.element22 = getelementptr inbounds i8, i8* %arrayinit.element21, i32 1
+  store i8 4, i8* %arrayinit.element22, align 1
+  %arrayinit.element23 = getelementptr inbounds i8, i8* %arrayinit.element22, i32 1
+  store i8 64, i8* %arrayinit.element23, align 1
+  %arrayinit.element24 = getelementptr inbounds i8, i8* %arrayinit.element23, i32 1
+  store i8 65, i8* %arrayinit.element24, align 1
+  %arrayinit.element25 = getelementptr inbounds i8, i8* %arrayinit.element24, i32 1
+  store i8 0, i8* %arrayinit.element25, align 1
+  %arrayinit.element26 = getelementptr inbounds i8, i8* %arrayinit.element25, i32 1
+  store i8 15, i8* %arrayinit.element26, align 1
+  %arrayinit.element27 = getelementptr inbounds i8, i8* %arrayinit.element26, i32 1
+  store i8 11, i8* %arrayinit.element27, align 1
+  %arrayinit.element28 = getelementptr inbounds i8, i8* %arrayinit.element27, i32 1
+  store i8 32, i8* %arrayinit.element28, align 1
+  %arrayinit.element29 = getelementptr inbounds i8, i8* %arrayinit.element28, i32 1
+  %a9 = load i8, i8* %old_val, align 1
+  store i8 %a9, i8* %arrayinit.element29, align 1
+  %arrayinit.element30 = getelementptr inbounds i8, i8* %arrayinit.element29, i32 1
+  store i8 32, i8* %arrayinit.element30, align 1
+  %arrayinit.element31 = getelementptr inbounds i8, i8* %arrayinit.element30, i32 1
+  %a10 = load i8, i8* %simd, align 1
+  store i8 %a10, i8* %arrayinit.element31, align 1
+  %arrayinit.element32 = getelementptr inbounds i8, i8* %arrayinit.element31, i32 1
+  store i8 -3, i8* %arrayinit.element32, align 1
+  %arrayinit.element33 = getelementptr inbounds i8, i8* %arrayinit.element32, i32 1
+  store i8 1, i8* %arrayinit.element33, align 1
+  %arrayinit.element34 = getelementptr inbounds i8, i8* %arrayinit.element33, i32 1
+  store i8 1, i8* %arrayinit.element34, align 1
+  %arrayinit.element35 = getelementptr inbounds i8, i8* %arrayinit.element34, i32 1
+  store i8 92, i8* %arrayinit.element35, align 1
+  %arrayinit.element36 = getelementptr inbounds i8, i8* %arrayinit.element35, i32 1
+  store i8 4, i8* %arrayinit.element36, align 1
+  %arrayinit.element37 = getelementptr inbounds i8, i8* %arrayinit.element36, i32 1
+  store i8 64, i8* %arrayinit.element37, align 1
+  %arrayinit.element38 = getelementptr inbounds i8, i8* %arrayinit.element37, i32 1
+  store i8 65, i8* %arrayinit.element38, align 1
+  %arrayinit.element39 = getelementptr inbounds i8, i8* %arrayinit.element38, i32 1
+  store i8 0, i8* %arrayinit.element39, align 1
+  %arrayinit.element40 = getelementptr inbounds i8, i8* %arrayinit.element39, i32 1
+  store i8 15, i8* %arrayinit.element40, align 1
+  %arrayinit.element41 = getelementptr inbounds i8, i8* %arrayinit.element40, i32 1
+  store i8 11, i8* %arrayinit.element41, align 1
+  %arrayinit.element42 = getelementptr inbounds i8, i8* %arrayinit.element41, i32 1
+  store i8 32, i8* %arrayinit.element42, align 1
+  %arrayinit.element43 = getelementptr inbounds i8, i8* %arrayinit.element42, i32 1
+  %a11 = load i8, i8* %old_val, align 1
+  store i8 %a11, i8* %arrayinit.element43, align 1
+  %arrayinit.element44 = getelementptr inbounds i8, i8* %arrayinit.element43, i32 1
+  store i8 32, i8* %arrayinit.element44, align 1
+  %arrayinit.element45 = getelementptr inbounds i8, i8* %arrayinit.element44, i32 1
+  %a12 = load i8, i8* %simd, align 1
+  store i8 %a12, i8* %arrayinit.element45, align 1
+  %arrayinit.element46 = getelementptr inbounds i8, i8* %arrayinit.element45, i32 1
+  store i8 -3, i8* %arrayinit.element46, align 1
+  %arrayinit.element47 = getelementptr inbounds i8, i8* %arrayinit.element46, i32 1
+  store i8 1, i8* %arrayinit.element47, align 1
+  %arrayinit.element48 = getelementptr inbounds i8, i8* %arrayinit.element47, i32 1
+  store i8 2, i8* %arrayinit.element48, align 1
+  %arrayinit.element49 = getelementptr inbounds i8, i8* %arrayinit.element48, i32 1
+  store i8 92, i8* %arrayinit.element49, align 1
+  %arrayinit.element50 = getelementptr inbounds i8, i8* %arrayinit.element49, i32 1
+  store i8 4, i8* %arrayinit.element50, align 1
+  %arrayinit.element51 = getelementptr inbounds i8, i8* %arrayinit.element50, i32 1
+  store i8 64, i8* %arrayinit.element51, align 1
+  %arrayinit.element52 = getelementptr inbounds i8, i8* %arrayinit.element51, i32 1
+  store i8 65, i8* %arrayinit.element52, align 1
+  %arrayinit.element53 = getelementptr inbounds i8, i8* %arrayinit.element52, i32 1
+  store i8 0, i8* %arrayinit.element53, align 1
+  %arrayinit.element54 = getelementptr inbounds i8, i8* %arrayinit.element53, i32 1
+  store i8 15, i8* %arrayinit.element54, align 1
+  %arrayinit.element55 = getelementptr inbounds i8, i8* %arrayinit.element54, i32 1
+  store i8 11, i8* %arrayinit.element55, align 1
+  %arrayinit.element56 = getelementptr inbounds i8, i8* %arrayinit.element55, i32 1
+  store i8 32, i8* %arrayinit.element56, align 1
+  %arrayinit.element57 = getelementptr inbounds i8, i8* %arrayinit.element56, i32 1
+  %a13 = load i8, i8* %old_val, align 1
+  store i8 %a13, i8* %arrayinit.element57, align 1
+  %arrayinit.element58 = getelementptr inbounds i8, i8* %arrayinit.element57, i32 1
+  store i8 32, i8* %arrayinit.element58, align 1
+  %arrayinit.element59 = getelementptr inbounds i8, i8* %arrayinit.element58, i32 1
+  %a14 = load i8, i8* %simd, align 1
+  store i8 %a14, i8* %arrayinit.element59, align 1
+  %arrayinit.element60 = getelementptr inbounds i8, i8* %arrayinit.element59, i32 1
+  store i8 -3, i8* %arrayinit.element60, align 1
+  %arrayinit.element61 = getelementptr inbounds i8, i8* %arrayinit.element60, i32 1
+  store i8 1, i8* %arrayinit.element61, align 1
+  %arrayinit.element62 = getelementptr inbounds i8, i8* %arrayinit.element61, i32 1
+  store i8 3, i8* %arrayinit.element62, align 1
+  %arrayinit.element63 = getelementptr inbounds i8, i8* %arrayinit.element62, i32 1
+  store i8 92, i8* %arrayinit.element63, align 1
+  %arrayinit.element64 = getelementptr inbounds i8, i8* %arrayinit.element63, i32 1
+  store i8 4, i8* %arrayinit.element64, align 1
+  %arrayinit.element65 = getelementptr inbounds i8, i8* %arrayinit.element64, i32 1
+  store i8 64, i8* %arrayinit.element65, align 1
+  %arrayinit.element66 = getelementptr inbounds i8, i8* %arrayinit.element65, i32 1
+  store i8 65, i8* %arrayinit.element66, align 1
+  %arrayinit.element67 = getelementptr inbounds i8, i8* %arrayinit.element66, i32 1
+  store i8 0, i8* %arrayinit.element67, align 1
+  %arrayinit.element68 = getelementptr inbounds i8, i8* %arrayinit.element67, i32 1
+  store i8 15, i8* %arrayinit.element68, align 1
+  %arrayinit.element69 = getelementptr inbounds i8, i8* %arrayinit.element68, i32 1
+  store i8 11, i8* %arrayinit.element69, align 1
+  %arrayinit.element70 = getelementptr inbounds i8, i8* %arrayinit.element69, i32 1
+  store i8 32, i8* %arrayinit.element70, align 1
+  %arrayinit.element71 = getelementptr inbounds i8, i8* %arrayinit.element70, i32 1
+  %a15 = load i8, i8* %simd, align 1
+  store i8 %a15, i8* %arrayinit.element71, align 1
+  %arrayinit.element72 = getelementptr inbounds i8, i8* %arrayinit.element71, i32 1
+  store i8 32, i8* %arrayinit.element72, align 1
+  %arrayinit.element73 = getelementptr inbounds i8, i8* %arrayinit.element72, i32 1
+  %a16 = load i8, i8* %new_val, align 1
+  store i8 %a16, i8* %arrayinit.element73, align 1
+  %arrayinit.element74 = getelementptr inbounds i8, i8* %arrayinit.element73, i32 1
+  store i8 -3, i8* %arrayinit.element74, align 1
+  %arrayinit.element75 = getelementptr inbounds i8, i8* %arrayinit.element74, i32 1
+  store i8 2, i8* %arrayinit.element75, align 1
+  %arrayinit.element76 = getelementptr inbounds i8, i8* %arrayinit.element75, i32 1
+  store i8 1, i8* %arrayinit.element76, align 1
+  %arrayinit.element77 = getelementptr inbounds i8, i8* %arrayinit.element76, i32 1
+  store i8 33, i8* %arrayinit.element77, align 1
+  %arrayinit.element78 = getelementptr inbounds i8, i8* %arrayinit.element77, i32 1
+  %a17 = load i8, i8* %simd, align 1
+  store i8 %a17, i8* %arrayinit.element78, align 1
+  %arrayinit.element79 = getelementptr inbounds i8, i8* %arrayinit.element78, i32 1
+  store i8 32, i8* %arrayinit.element79, align 1
+  %arrayinit.element80 = getelementptr inbounds i8, i8* %arrayinit.element79, i32 1
+  %a18 = load i8, i8* %new_val, align 1
+  store i8 %a18, i8* %arrayinit.element80, align 1
+  %arrayinit.element81 = getelementptr inbounds i8, i8* %arrayinit.element80, i32 1
+  store i8 32, i8* %arrayinit.element81, align 1
+  %arrayinit.element82 = getelementptr inbounds i8, i8* %arrayinit.element81, i32 1
+  %a19 = load i8, i8* %simd, align 1
+  store i8 %a19, i8* %arrayinit.element82, align 1
+  %arrayinit.element83 = getelementptr inbounds i8, i8* %arrayinit.element82, i32 1
+  store i8 -3, i8* %arrayinit.element83, align 1
+  %arrayinit.element84 = getelementptr inbounds i8, i8* %arrayinit.element83, i32 1
+  store i8 1, i8* %arrayinit.element84, align 1
+  %arrayinit.element85 = getelementptr inbounds i8, i8* %arrayinit.element84, i32 1
+  store i8 0, i8* %arrayinit.element85, align 1
+  %arrayinit.element86 = getelementptr inbounds i8, i8* %arrayinit.element85, i32 1
+  store i8 92, i8* %arrayinit.element86, align 1
+  %arrayinit.element87 = getelementptr inbounds i8, i8* %arrayinit.element86, i32 1
+  store i8 4, i8* %arrayinit.element87, align 1
+  %arrayinit.element88 = getelementptr inbounds i8, i8* %arrayinit.element87, i32 1
+  store i8 64, i8* %arrayinit.element88, align 1
+  %arrayinit.element89 = getelementptr inbounds i8, i8* %arrayinit.element88, i32 1
+  store i8 65, i8* %arrayinit.element89, align 1
+  %arrayinit.element90 = getelementptr inbounds i8, i8* %arrayinit.element89, i32 1
+  store i8 0, i8* %arrayinit.element90, align 1
+  %arrayinit.element91 = getelementptr inbounds i8, i8* %arrayinit.element90, i32 1
+  store i8 15, i8* %arrayinit.element91, align 1
+  %arrayinit.element92 = getelementptr inbounds i8, i8* %arrayinit.element91, i32 1
+  store i8 11, i8* %arrayinit.element92, align 1
+  %arrayinit.element93 = getelementptr inbounds i8, i8* %arrayinit.element92, i32 1
+  store i8 32, i8* %arrayinit.element93, align 1
+  %arrayinit.element94 = getelementptr inbounds i8, i8* %arrayinit.element93, i32 1
+  %a20 = load i8, i8* %new_val, align 1
+  store i8 %a20, i8* %arrayinit.element94, align 1
+  %arrayinit.element95 = getelementptr inbounds i8, i8* %arrayinit.element94, i32 1
+  store i8 32, i8* %arrayinit.element95, align 1
+  %arrayinit.element96 = getelementptr inbounds i8, i8* %arrayinit.element95, i32 1
+  %a21 = load i8, i8* %simd, align 1
+  store i8 %a21, i8* %arrayinit.element96, align 1
+  %arrayinit.element97 = getelementptr inbounds i8, i8* %arrayinit.element96, i32 1
+  store i8 -3, i8* %arrayinit.element97, align 1
+  %arrayinit.element98 = getelementptr inbounds i8, i8* %arrayinit.element97, i32 1
+  store i8 1, i8* %arrayinit.element98, align 1
+  %arrayinit.element99 = getelementptr inbounds i8, i8* %arrayinit.element98, i32 1
+  store i8 1, i8* %arrayinit.element99, align 1
+  %arrayinit.element100 = getelementptr inbounds i8, i8* %arrayinit.element99, i32 1
+  store i8 92, i8* %arrayinit.element100, align 1
+  %arrayinit.element101 = getelementptr inbounds i8, i8* %arrayinit.element100, i32 1
+  store i8 4, i8* %arrayinit.element101, align 1
+  %arrayinit.element102 = getelementptr inbounds i8, i8* %arrayinit.element101, i32 1
+  store i8 64, i8* %arrayinit.element102, align 1
+  %arrayinit.element103 = getelementptr inbounds i8, i8* %arrayinit.element102, i32 1
+  store i8 65, i8* %arrayinit.element103, align 1
+  %arrayinit.element104 = getelementptr inbounds i8, i8* %arrayinit.element103, i32 1
+  store i8 0, i8* %arrayinit.element104, align 1
+  %arrayinit.element105 = getelementptr inbounds i8, i8* %arrayinit.element104, i32 1
+  store i8 15, i8* %arrayinit.element105, align 1
+  %arrayinit.element106 = getelementptr inbounds i8, i8* %arrayinit.element105, i32 1
+  store i8 11, i8* %arrayinit.element106, align 1
+  %arrayinit.element107 = getelementptr inbounds i8, i8* %arrayinit.element106, i32 1
+  store i8 32, i8* %arrayinit.element107, align 1
+  %arrayinit.element108 = getelementptr inbounds i8, i8* %arrayinit.element107, i32 1
+  %a22 = load i8, i8* %old_val, align 1
+  store i8 %a22, i8* %arrayinit.element108, align 1
+  %arrayinit.element109 = getelementptr inbounds i8, i8* %arrayinit.element108, i32 1
+  store i8 32, i8* %arrayinit.element109, align 1
+  %arrayinit.element110 = getelementptr inbounds i8, i8* %arrayinit.element109, i32 1
+  %a23 = load i8, i8* %simd, align 1
+  store i8 %a23, i8* %arrayinit.element110, align 1
+  %arrayinit.element111 = getelementptr inbounds i8, i8* %arrayinit.element110, i32 1
+  store i8 -3, i8* %arrayinit.element111, align 1
+  %arrayinit.element112 = getelementptr inbounds i8, i8* %arrayinit.element111, i32 1
+  store i8 1, i8* %arrayinit.element112, align 1
+  %arrayinit.element113 = getelementptr inbounds i8, i8* %arrayinit.element112, i32 1
+  store i8 2, i8* %arrayinit.element113, align 1
+  %arrayinit.element114 = getelementptr inbounds i8, i8* %arrayinit.element113, i32 1
+  store i8 92, i8* %arrayinit.element114, align 1
+  %arrayinit.element115 = getelementptr inbounds i8, i8* %arrayinit.element114, i32 1
+  store i8 4, i8* %arrayinit.element115, align 1
+  %arrayinit.element116 = getelementptr inbounds i8, i8* %arrayinit.element115, i32 1
+  store i8 64, i8* %arrayinit.element116, align 1
+  %arrayinit.element117 = getelementptr inbounds i8, i8* %arrayinit.element116, i32 1
+  store i8 65, i8* %arrayinit.element117, align 1
+  %arrayinit.element118 = getelementptr inbounds i8, i8* %arrayinit.element117, i32 1
+  store i8 0, i8* %arrayinit.element118, align 1
+  %arrayinit.element119 = getelementptr inbounds i8, i8* %arrayinit.element118, i32 1
+  store i8 15, i8* %arrayinit.element119, align 1
+  %arrayinit.element120 = getelementptr inbounds i8, i8* %arrayinit.element119, i32 1
+  store i8 11, i8* %arrayinit.element120, align 1
+  %arrayinit.element121 = getelementptr inbounds i8, i8* %arrayinit.element120, i32 1
+  store i8 32, i8* %arrayinit.element121, align 1
+  %arrayinit.element122 = getelementptr inbounds i8, i8* %arrayinit.element121, i32 1
+  %a24 = load i8, i8* %old_val, align 1
+  store i8 %a24, i8* %arrayinit.element122, align 1
+  %arrayinit.element123 = getelementptr inbounds i8, i8* %arrayinit.element122, i32 1
+  store i8 32, i8* %arrayinit.element123, align 1
+  %arrayinit.element124 = getelementptr inbounds i8, i8* %arrayinit.element123, i32 1
+  %a25 = load i8, i8* %simd, align 1
+  store i8 %a25, i8* %arrayinit.element124, align 1
+  %arrayinit.element125 = getelementptr inbounds i8, i8* %arrayinit.element124, i32 1
+  store i8 -3, i8* %arrayinit.element125, align 1
+  %arrayinit.element126 = getelementptr inbounds i8, i8* %arrayinit.element125, i32 1
+  store i8 1, i8* %arrayinit.element126, align 1
+  %arrayinit.element127 = getelementptr inbounds i8, i8* %arrayinit.element126, i32 1
+  store i8 3, i8* %arrayinit.element127, align 1
+  %arrayinit.element128 = getelementptr inbounds i8, i8* %arrayinit.element127, i32 1
+  store i8 92, i8* %arrayinit.element128, align 1
+  %arrayinit.element129 = getelementptr inbounds i8, i8* %arrayinit.element128, i32 1
+  store i8 4, i8* %arrayinit.element129, align 1
+  %arrayinit.element130 = getelementptr inbounds i8, i8* %arrayinit.element129, i32 1
+  store i8 64, i8* %arrayinit.element130, align 1
+  %arrayinit.element131 = getelementptr inbounds i8, i8* %arrayinit.element130, i32 1
+  store i8 65, i8* %arrayinit.element131, align 1
+  %arrayinit.element132 = getelementptr inbounds i8, i8* %arrayinit.element131, i32 1
+  store i8 0, i8* %arrayinit.element132, align 1
+  %arrayinit.element133 = getelementptr inbounds i8, i8* %arrayinit.element132, i32 1
+  store i8 15, i8* %arrayinit.element133, align 1
+  %arrayinit.element134 = getelementptr inbounds i8, i8* %arrayinit.element133, i32 1
+  store i8 11, i8* %arrayinit.element134, align 1
+  %arrayinit.element135 = getelementptr inbounds i8, i8* %arrayinit.element134, i32 1
+  store i8 32, i8* %arrayinit.element135, align 1
+  %arrayinit.element136 = getelementptr inbounds i8, i8* %arrayinit.element135, i32 1
+  %a26 = load i8, i8* %simd, align 1
+  store i8 %a26, i8* %arrayinit.element136, align 1
+  %arrayinit.element137 = getelementptr inbounds i8, i8* %arrayinit.element136, i32 1
+  store i8 32, i8* %arrayinit.element137, align 1
+  %arrayinit.element138 = getelementptr inbounds i8, i8* %arrayinit.element137, i32 1
+  %a27 = load i8, i8* %new_val, align 1
+  store i8 %a27, i8* %arrayinit.element138, align 1
+  %arrayinit.element139 = getelementptr inbounds i8, i8* %arrayinit.element138, i32 1
+  store i8 -3, i8* %arrayinit.element139, align 1
+  %arrayinit.element140 = getelementptr inbounds i8, i8* %arrayinit.element139, i32 1
+  store i8 2, i8* %arrayinit.element140, align 1
+  %arrayinit.element141 = getelementptr inbounds i8, i8* %arrayinit.element140, i32 1
+  store i8 2, i8* %arrayinit.element141, align 1
+  %arrayinit.element142 = getelementptr inbounds i8, i8* %arrayinit.element141, i32 1
+  store i8 33, i8* %arrayinit.element142, align 1
+  %arrayinit.element143 = getelementptr inbounds i8, i8* %arrayinit.element142, i32 1
+  %a28 = load i8, i8* %simd, align 1
+  store i8 %a28, i8* %arrayinit.element143, align 1
+  %arrayinit.element144 = getelementptr inbounds i8, i8* %arrayinit.element143, i32 1
+  store i8 32, i8* %arrayinit.element144, align 1
+  %arrayinit.element145 = getelementptr inbounds i8, i8* %arrayinit.element144, i32 1
+  %a29 = load i8, i8* %new_val, align 1
+  store i8 %a29, i8* %arrayinit.element145, align 1
+  %arrayinit.element146 = getelementptr inbounds i8, i8* %arrayinit.element145, i32 1
+  store i8 32, i8* %arrayinit.element146, align 1
+  %arrayinit.element147 = getelementptr inbounds i8, i8* %arrayinit.element146, i32 1
+  %a30 = load i8, i8* %simd, align 1
+  store i8 %a30, i8* %arrayinit.element147, align 1
+  %arrayinit.element148 = getelementptr inbounds i8, i8* %arrayinit.element147, i32 1
+  store i8 -3, i8* %arrayinit.element148, align 1
+  %arrayinit.element149 = getelementptr inbounds i8, i8* %arrayinit.element148, i32 1
+  store i8 1, i8* %arrayinit.element149, align 1
+  %arrayinit.element150 = getelementptr inbounds i8, i8* %arrayinit.element149, i32 1
+  store i8 0, i8* %arrayinit.element150, align 1
+  %arrayinit.element151 = getelementptr inbounds i8, i8* %arrayinit.element150, i32 1
+  store i8 92, i8* %arrayinit.element151, align 1
+  %arrayinit.element152 = getelementptr inbounds i8, i8* %arrayinit.element151, i32 1
+  store i8 4, i8* %arrayinit.element152, align 1
+  %arrayinit.element153 = getelementptr inbounds i8, i8* %arrayinit.element152, i32 1
+  store i8 64, i8* %arrayinit.element153, align 1
+  %arrayinit.element154 = getelementptr inbounds i8, i8* %arrayinit.element153, i32 1
+  store i8 65, i8* %arrayinit.element154, align 1
+  %arrayinit.element155 = getelementptr inbounds i8, i8* %arrayinit.element154, i32 1
+  store i8 0, i8* %arrayinit.element155, align 1
+  %arrayinit.element156 = getelementptr inbounds i8, i8* %arrayinit.element155, i32 1
+  store i8 15, i8* %arrayinit.element156, align 1
+  %arrayinit.element157 = getelementptr inbounds i8, i8* %arrayinit.element156, i32 1
+  store i8 11, i8* %arrayinit.element157, align 1
+  %arrayinit.element158 = getelementptr inbounds i8, i8* %arrayinit.element157, i32 1
+  store i8 32, i8* %arrayinit.element158, align 1
+  %arrayinit.element159 = getelementptr inbounds i8, i8* %arrayinit.element158, i32 1
+  %a31 = load i8, i8* %new_val, align 1
+  store i8 %a31, i8* %arrayinit.element159, align 1
+  %arrayinit.element160 = getelementptr inbounds i8, i8* %arrayinit.element159, i32 1
+  store i8 32, i8* %arrayinit.element160, align 1
+  %arrayinit.element161 = getelementptr inbounds i8, i8* %arrayinit.element160, i32 1
+  %a32 = load i8, i8* %simd, align 1
+  store i8 %a32, i8* %arrayinit.element161, align 1
+  %arrayinit.element162 = getelementptr inbounds i8, i8* %arrayinit.element161, i32 1
+  store i8 -3, i8* %arrayinit.element162, align 1
+  %arrayinit.element163 = getelementptr inbounds i8, i8* %arrayinit.element162, i32 1
+  store i8 1, i8* %arrayinit.element163, align 1
+  %arrayinit.element164 = getelementptr inbounds i8, i8* %arrayinit.element163, i32 1
+  store i8 1, i8* %arrayinit.element164, align 1
+  %arrayinit.element165 = getelementptr inbounds i8, i8* %arrayinit.element164, i32 1
+  store i8 92, i8* %arrayinit.element165, align 1
+  %arrayinit.element166 = getelementptr inbounds i8, i8* %arrayinit.element165, i32 1
+  store i8 4, i8* %arrayinit.element166, align 1
+  %arrayinit.element167 = getelementptr inbounds i8, i8* %arrayinit.element166, i32 1
+  store i8 64, i8* %arrayinit.element167, align 1
+  %arrayinit.element168 = getelementptr inbounds i8, i8* %arrayinit.element167, i32 1
+  store i8 65, i8* %arrayinit.element168, align 1
+  %arrayinit.element169 = getelementptr inbounds i8, i8* %arrayinit.element168, i32 1
+  store i8 0, i8* %arrayinit.element169, align 1
+  %arrayinit.element170 = getelementptr inbounds i8, i8* %arrayinit.element169, i32 1
+  store i8 15, i8* %arrayinit.element170, align 1
+  %arrayinit.element171 = getelementptr inbounds i8, i8* %arrayinit.element170, i32 1
+  store i8 11, i8* %arrayinit.element171, align 1
+  %arrayinit.element172 = getelementptr inbounds i8, i8* %arrayinit.element171, i32 1
+  store i8 32, i8* %arrayinit.element172, align 1
+  %arrayinit.element173 = getelementptr inbounds i8, i8* %arrayinit.element172, i32 1
+  %a33 = load i8, i8* %new_val, align 1
+  store i8 %a33, i8* %arrayinit.element173, align 1
+  %arrayinit.element174 = getelementptr inbounds i8, i8* %arrayinit.element173, i32 1
+  store i8 32, i8* %arrayinit.element174, align 1
+  %arrayinit.element175 = getelementptr inbounds i8, i8* %arrayinit.element174, i32 1
+  %a34 = load i8, i8* %simd, align 1
+  store i8 %a34, i8* %arrayinit.element175, align 1
+  %arrayinit.element176 = getelementptr inbounds i8, i8* %arrayinit.element175, i32 1
+  store i8 -3, i8* %arrayinit.element176, align 1
+  %arrayinit.element177 = getelementptr inbounds i8, i8* %arrayinit.element176, i32 1
+  store i8 1, i8* %arrayinit.element177, align 1
+  %arrayinit.element178 = getelementptr inbounds i8, i8* %arrayinit.element177, i32 1
+  store i8 2, i8* %arrayinit.element178, align 1
+  %arrayinit.element179 = getelementptr inbounds i8, i8* %arrayinit.element178, i32 1
+  store i8 92, i8* %arrayinit.element179, align 1
+  %arrayinit.element180 = getelementptr inbounds i8, i8* %arrayinit.element179, i32 1
+  store i8 4, i8* %arrayinit.element180, align 1
+  %arrayinit.element181 = getelementptr inbounds i8, i8* %arrayinit.element180, i32 1
+  store i8 64, i8* %arrayinit.element181, align 1
+  %arrayinit.element182 = getelementptr inbounds i8, i8* %arrayinit.element181, i32 1
+  store i8 65, i8* %arrayinit.element182, align 1
+  %arrayinit.element183 = getelementptr inbounds i8, i8* %arrayinit.element182, i32 1
+  store i8 0, i8* %arrayinit.element183, align 1
+  %arrayinit.element184 = getelementptr inbounds i8, i8* %arrayinit.element183, i32 1
+  store i8 15, i8* %arrayinit.element184, align 1
+  %arrayinit.element185 = getelementptr inbounds i8, i8* %arrayinit.element184, i32 1
+  store i8 11, i8* %arrayinit.element185, align 1
+  %arrayinit.element186 = getelementptr inbounds i8, i8* %arrayinit.element185, i32 1
+  store i8 32, i8* %arrayinit.element186, align 1
+  %arrayinit.element187 = getelementptr inbounds i8, i8* %arrayinit.element186, i32 1
+  %a35 = load i8, i8* %old_val, align 1
+  store i8 %a35, i8* %arrayinit.element187, align 1
+  %arrayinit.element188 = getelementptr inbounds i8, i8* %arrayinit.element187, i32 1
+  store i8 32, i8* %arrayinit.element188, align 1
+  %arrayinit.element189 = getelementptr inbounds i8, i8* %arrayinit.element188, i32 1
+  %a36 = load i8, i8* %simd, align 1
+  store i8 %a36, i8* %arrayinit.element189, align 1
+  %arrayinit.element190 = getelementptr inbounds i8, i8* %arrayinit.element189, i32 1
+  store i8 -3, i8* %arrayinit.element190, align 1
+  %arrayinit.element191 = getelementptr inbounds i8, i8* %arrayinit.element190, i32 1
+  store i8 1, i8* %arrayinit.element191, align 1
+  %arrayinit.element192 = getelementptr inbounds i8, i8* %arrayinit.element191, i32 1
+  store i8 3, i8* %arrayinit.element192, align 1
+  %arrayinit.element193 = getelementptr inbounds i8, i8* %arrayinit.element192, i32 1
+  store i8 92, i8* %arrayinit.element193, align 1
+  %arrayinit.element194 = getelementptr inbounds i8, i8* %arrayinit.element193, i32 1
+  store i8 4, i8* %arrayinit.element194, align 1
+  %arrayinit.element195 = getelementptr inbounds i8, i8* %arrayinit.element194, i32 1
+  store i8 64, i8* %arrayinit.element195, align 1
+  %arrayinit.element196 = getelementptr inbounds i8, i8* %arrayinit.element195, i32 1
+  store i8 65, i8* %arrayinit.element196, align 1
+  %arrayinit.element197 = getelementptr inbounds i8, i8* %arrayinit.element196, i32 1
+  store i8 0, i8* %arrayinit.element197, align 1
+  %arrayinit.element198 = getelementptr inbounds i8, i8* %arrayinit.element197, i32 1
+  store i8 15, i8* %arrayinit.element198, align 1
+  %arrayinit.element199 = getelementptr inbounds i8, i8* %arrayinit.element198, i32 1
+  store i8 11, i8* %arrayinit.element199, align 1
+  %arrayinit.element200 = getelementptr inbounds i8, i8* %arrayinit.element199, i32 1
+  store i8 32, i8* %arrayinit.element200, align 1
+  %arrayinit.element201 = getelementptr inbounds i8, i8* %arrayinit.element200, i32 1
+  %a37 = load i8, i8* %simd, align 1
+  store i8 %a37, i8* %arrayinit.element201, align 1
+  %arrayinit.element202 = getelementptr inbounds i8, i8* %arrayinit.element201, i32 1
+  store i8 32, i8* %arrayinit.element202, align 1
+  %arrayinit.element203 = getelementptr inbounds i8, i8* %arrayinit.element202, i32 1
+  %a38 = load i8, i8* %new_val, align 1
+  store i8 %a38, i8* %arrayinit.element203, align 1
+  %arrayinit.element204 = getelementptr inbounds i8, i8* %arrayinit.element203, i32 1
+  store i8 -3, i8* %arrayinit.element204, align 1
+  %arrayinit.element205 = getelementptr inbounds i8, i8* %arrayinit.element204, i32 1
+  store i8 2, i8* %arrayinit.element205, align 1
+  %arrayinit.element206 = getelementptr inbounds i8, i8* %arrayinit.element205, i32 1
+  store i8 3, i8* %arrayinit.element206, align 1
+  %arrayinit.element207 = getelementptr inbounds i8, i8* %arrayinit.element206, i32 1
+  store i8 33, i8* %arrayinit.element207, align 1
+  %arrayinit.element208 = getelementptr inbounds i8, i8* %arrayinit.element207, i32 1
+  %a39 = load i8, i8* %simd, align 1
+  store i8 %a39, i8* %arrayinit.element208, align 1
+  %arrayinit.element209 = getelementptr inbounds i8, i8* %arrayinit.element208, i32 1
+  store i8 32, i8* %arrayinit.element209, align 1
+  %arrayinit.element210 = getelementptr inbounds i8, i8* %arrayinit.element209, i32 1
+  %a40 = load i8, i8* %new_val, align 1
+  store i8 %a40, i8* %arrayinit.element210, align 1
+  %arrayinit.element211 = getelementptr inbounds i8, i8* %arrayinit.element210, i32 1
+  store i8 32, i8* %arrayinit.element211, align 1
+  %arrayinit.element212 = getelementptr inbounds i8, i8* %arrayinit.element211, i32 1
+  %a41 = load i8, i8* %simd, align 1
+  store i8 %a41, i8* %arrayinit.element212, align 1
+  %arrayinit.element213 = getelementptr inbounds i8, i8* %arrayinit.element212, i32 1
+  store i8 -3, i8* %arrayinit.element213, align 1
+  %arrayinit.element214 = getelementptr inbounds i8, i8* %arrayinit.element213, i32 1
+  store i8 1, i8* %arrayinit.element214, align 1
+  %arrayinit.element215 = getelementptr inbounds i8, i8* %arrayinit.element214, i32 1
+  store i8 0, i8* %arrayinit.element215, align 1
+  %arrayinit.element216 = getelementptr inbounds i8, i8* %arrayinit.element215, i32 1
+  store i8 92, i8* %arrayinit.element216, align 1
+  %arrayinit.element217 = getelementptr inbounds i8, i8* %arrayinit.element216, i32 1
+  store i8 4, i8* %arrayinit.element217, align 1
+  %arrayinit.element218 = getelementptr inbounds i8, i8* %arrayinit.element217, i32 1
+  store i8 64, i8* %arrayinit.element218, align 1
+  %arrayinit.element219 = getelementptr inbounds i8, i8* %arrayinit.element218, i32 1
+  store i8 65, i8* %arrayinit.element219, align 1
+  %arrayinit.element220 = getelementptr inbounds i8, i8* %arrayinit.element219, i32 1
+  store i8 0, i8* %arrayinit.element220, align 1
+  %arrayinit.element221 = getelementptr inbounds i8, i8* %arrayinit.element220, i32 1
+  store i8 15, i8* %arrayinit.element221, align 1
+  %arrayinit.element222 = getelementptr inbounds i8, i8* %arrayinit.element221, i32 1
+  store i8 11, i8* %arrayinit.element222, align 1
+  %arrayinit.element223 = getelementptr inbounds i8, i8* %arrayinit.element222, i32 1
+  store i8 32, i8* %arrayinit.element223, align 1
+  %arrayinit.element224 = getelementptr inbounds i8, i8* %arrayinit.element223, i32 1
+  %a42 = load i8, i8* %new_val, align 1
+  store i8 %a42, i8* %arrayinit.element224, align 1
+  %arrayinit.element225 = getelementptr inbounds i8, i8* %arrayinit.element224, i32 1
+  store i8 32, i8* %arrayinit.element225, align 1
+  %arrayinit.element226 = getelementptr inbounds i8, i8* %arrayinit.element225, i32 1
+  %a43 = load i8, i8* %simd, align 1
+  store i8 %a43, i8* %arrayinit.element226, align 1
+  %arrayinit.element227 = getelementptr inbounds i8, i8* %arrayinit.element226, i32 1
+  store i8 -3, i8* %arrayinit.element227, align 1
+  %arrayinit.element228 = getelementptr inbounds i8, i8* %arrayinit.element227, i32 1
+  store i8 1, i8* %arrayinit.element228, align 1
+  %arrayinit.element229 = getelementptr inbounds i8, i8* %arrayinit.element228, i32 1
+  store i8 1, i8* %arrayinit.element229, align 1
+  %arrayinit.element230 = getelementptr inbounds i8, i8* %arrayinit.element229, i32 1
+  store i8 92, i8* %arrayinit.element230, align 1
+  %arrayinit.element231 = getelementptr inbounds i8, i8* %arrayinit.element230, i32 1
+  store i8 4, i8* %arrayinit.element231, align 1
+  %arrayinit.element232 = getelementptr inbounds i8, i8* %arrayinit.element231, i32 1
+  store i8 64, i8* %arrayinit.element232, align 1
+  %arrayinit.element233 = getelementptr inbounds i8, i8* %arrayinit.element232, i32 1
+  store i8 65, i8* %arrayinit.element233, align 1
+  %arrayinit.element234 = getelementptr inbounds i8, i8* %arrayinit.element233, i32 1
+  store i8 0, i8* %arrayinit.element234, align 1
+  %arrayinit.element235 = getelementptr inbounds i8, i8* %arrayinit.element234, i32 1
+  store i8 15, i8* %arrayinit.element235, align 1
+  %arrayinit.element236 = getelementptr inbounds i8, i8* %arrayinit.element235, i32 1
+  store i8 11, i8* %arrayinit.element236, align 1
+  %arrayinit.element237 = getelementptr inbounds i8, i8* %arrayinit.element236, i32 1
+  store i8 32, i8* %arrayinit.element237, align 1
+  %arrayinit.element238 = getelementptr inbounds i8, i8* %arrayinit.element237, i32 1
+  %a44 = load i8, i8* %new_val, align 1
+  store i8 %a44, i8* %arrayinit.element238, align 1
+  %arrayinit.element239 = getelementptr inbounds i8, i8* %arrayinit.element238, i32 1
+  store i8 32, i8* %arrayinit.element239, align 1
+  %arrayinit.element240 = getelementptr inbounds i8, i8* %arrayinit.element239, i32 1
+  %a45 = load i8, i8* %simd, align 1
+  store i8 %a45, i8* %arrayinit.element240, align 1
+  %arrayinit.element241 = getelementptr inbounds i8, i8* %arrayinit.element240, i32 1
+  store i8 -3, i8* %arrayinit.element241, align 1
+  %arrayinit.element242 = getelementptr inbounds i8, i8* %arrayinit.element241, i32 1
+  store i8 1, i8* %arrayinit.element242, align 1
+  %arrayinit.element243 = getelementptr inbounds i8, i8* %arrayinit.element242, i32 1
+  store i8 2, i8* %arrayinit.element243, align 1
+  %arrayinit.element244 = getelementptr inbounds i8, i8* %arrayinit.element243, i32 1
+  store i8 92, i8* %arrayinit.element244, align 1
+  %arrayinit.element245 = getelementptr inbounds i8, i8* %arrayinit.element244, i32 1
+  store i8 4, i8* %arrayinit.element245, align 1
+  %arrayinit.element246 = getelementptr inbounds i8, i8* %arrayinit.element245, i32 1
+  store i8 64, i8* %arrayinit.element246, align 1
+  %arrayinit.element247 = getelementptr inbounds i8, i8* %arrayinit.element246, i32 1
+  store i8 65, i8* %arrayinit.element247, align 1
+  %arrayinit.element248 = getelementptr inbounds i8, i8* %arrayinit.element247, i32 1
+  store i8 0, i8* %arrayinit.element248, align 1
+  %arrayinit.element249 = getelementptr inbounds i8, i8* %arrayinit.element248, i32 1
+  store i8 15, i8* %arrayinit.element249, align 1
+  %arrayinit.element250 = getelementptr inbounds i8, i8* %arrayinit.element249, i32 1
+  store i8 11, i8* %arrayinit.element250, align 1
+  %arrayinit.element251 = getelementptr inbounds i8, i8* %arrayinit.element250, i32 1
+  store i8 32, i8* %arrayinit.element251, align 1
+  %arrayinit.element252 = getelementptr inbounds i8, i8* %arrayinit.element251, i32 1
+  %a46 = load i8, i8* %new_val, align 1
+  store i8 %a46, i8* %arrayinit.element252, align 1
+  %arrayinit.element253 = getelementptr inbounds i8, i8* %arrayinit.element252, i32 1
+  store i8 32, i8* %arrayinit.element253, align 1
+  %arrayinit.element254 = getelementptr inbounds i8, i8* %arrayinit.element253, i32 1
+  %a47 = load i8, i8* %simd, align 1
+  store i8 %a47, i8* %arrayinit.element254, align 1
+  %arrayinit.element255 = getelementptr inbounds i8, i8* %arrayinit.element254, i32 1
+  store i8 -3, i8* %arrayinit.element255, align 1
+  %arrayinit.element256 = getelementptr inbounds i8, i8* %arrayinit.element255, i32 1
+  store i8 1, i8* %arrayinit.element256, align 1
+  %arrayinit.element257 = getelementptr inbounds i8, i8* %arrayinit.element256, i32 1
+  store i8 3, i8* %arrayinit.element257, align 1
+  %arrayinit.element258 = getelementptr inbounds i8, i8* %arrayinit.element257, i32 1
+  store i8 92, i8* %arrayinit.element258, align 1
+  %arrayinit.element259 = getelementptr inbounds i8, i8* %arrayinit.element258, i32 1
+  store i8 4, i8* %arrayinit.element259, align 1
+  %arrayinit.element260 = getelementptr inbounds i8, i8* %arrayinit.element259, i32 1
+  store i8 64, i8* %arrayinit.element260, align 1
+  %arrayinit.element261 = getelementptr inbounds i8, i8* %arrayinit.element260, i32 1
+  store i8 65, i8* %arrayinit.element261, align 1
+  %arrayinit.element262 = getelementptr inbounds i8, i8* %arrayinit.element261, i32 1
+  store i8 0, i8* %arrayinit.element262, align 1
+  %arrayinit.element263 = getelementptr inbounds i8, i8* %arrayinit.element262, i32 1
+  store i8 15, i8* %arrayinit.element263, align 1
+  %arrayinit.element264 = getelementptr inbounds i8, i8* %arrayinit.element263, i32 1
+  store i8 11, i8* %arrayinit.element264, align 1
+  %arrayinit.element265 = getelementptr inbounds i8, i8* %arrayinit.element264, i32 1
+  store i8 65, i8* %arrayinit.element265, align 1
+  %arrayinit.element266 = getelementptr inbounds i8, i8* %arrayinit.element265, i32 1
+  store i8 1, i8* %arrayinit.element266, align 1
+  %arrayinit.element267 = getelementptr inbounds i8, i8* %arrayinit.element266, i32 1
+  store i8 15, i8* %arrayinit.element267, align 1
+  %arraydecay = getelementptr inbounds [269 x i8], [269 x i8]* %code, i32 0, i32 0
+  %arraydecay268 = getelementptr inbounds [269 x i8], [269 x i8]* %code, i32 0, i32 0
+  %add.ptr = getelementptr inbounds i8, i8* %arraydecay268, i32 269
+  call void @g(i8* %arraydecay, i8* %add.ptr)
+  ret void
+}
+
+declare void @g(i8*, i8*)
+
+attributes #1 = { noinline nounwind optnone ssp uwtable }

Modified: llvm/trunk/test/CodeGen/X86/pr32284.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32284.ll?rev=319142&r1=319141&r2=319142&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32284.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32284.ll Mon Nov 27 20:07:59 2017
@@ -185,8 +185,8 @@ define void @f1() {
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 16
 ; 686-O0-NEXT:    pushl %esi
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 20
-; 686-O0-NEXT:    subl $36, %esp
-; 686-O0-NEXT:    .cfi_def_cfa_offset 56
+; 686-O0-NEXT:    subl $24, %esp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 44
 ; 686-O0-NEXT:    .cfi_offset %esi, -20
 ; 686-O0-NEXT:    .cfi_offset %edi, -16
 ; 686-O0-NEXT:    .cfi_offset %ebx, -12
@@ -194,44 +194,38 @@ define void @f1() {
 ; 686-O0-NEXT:    movl var_5, %eax
 ; 686-O0-NEXT:    movl %eax, %ecx
 ; 686-O0-NEXT:    sarl $31, %ecx
-; 686-O0-NEXT:    movl %eax, %edx
-; 686-O0-NEXT:    andl %ecx, %edx
+; 686-O0-NEXT:    xorl $208307499, %eax # imm = 0xC6A852B
+; 686-O0-NEXT:    xorl $-2, %ecx
+; 686-O0-NEXT:    orl %ecx, %eax
+; 686-O0-NEXT:    setne {{[0-9]+}}(%esp)
+; 686-O0-NEXT:    movl var_5, %ecx
+; 686-O0-NEXT:    movl %ecx, %edx
 ; 686-O0-NEXT:    subl $-1, %edx
 ; 686-O0-NEXT:    sete %bl
-; 686-O0-NEXT:    movl %eax, %esi
-; 686-O0-NEXT:    xorl $208307499, %esi # imm = 0xC6A852B
+; 686-O0-NEXT:    movzbl %bl, %esi
 ; 686-O0-NEXT:    movl %ecx, %edi
-; 686-O0-NEXT:    xorl $-2, %edi
-; 686-O0-NEXT:    orl %edi, %esi
-; 686-O0-NEXT:    setne {{[0-9]+}}(%esp)
-; 686-O0-NEXT:    movl %eax, %edi
-; 686-O0-NEXT:    subl $-1, %edi
-; 686-O0-NEXT:    sete %bh
-; 686-O0-NEXT:    movzbl %bh, %ebp
-; 686-O0-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; 686-O0-NEXT:    xorl %eax, %eax
-; 686-O0-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; 686-O0-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; 686-O0-NEXT:    addl $7093, %eax # imm = 0x1BB5
-; 686-O0-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; 686-O0-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; 686-O0-NEXT:    adcxl %eax, %ecx
-; 686-O0-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; 686-O0-NEXT:    subl %ebp, %eax
-; 686-O0-NEXT:    sbbl $0, %ecx
-; 686-O0-NEXT:    setl %bh
-; 686-O0-NEXT:    movzbl %bh, %ebp
-; 686-O0-NEXT:    movl %ebp, var_57
+; 686-O0-NEXT:    sarl $31, %edi
+; 686-O0-NEXT:    xorl %ebp, %ebp
+; 686-O0-NEXT:    addl $7093, %ecx # imm = 0x1BB5
+; 686-O0-NEXT:    adcxl %ebp, %edi
+; 686-O0-NEXT:    subl %esi, %ecx
+; 686-O0-NEXT:    sbbl $0, %edi
+; 686-O0-NEXT:    setl %bl
+; 686-O0-NEXT:    movzbl %bl, %esi
+; 686-O0-NEXT:    movl %esi, var_57
 ; 686-O0-NEXT:    movl $0, var_57+4
+; 686-O0-NEXT:    movl var_5, %esi
+; 686-O0-NEXT:    subl $-1, %esi
+; 686-O0-NEXT:    sete %bl
 ; 686-O0-NEXT:    movzbl %bl, %ebp
 ; 686-O0-NEXT:    movl %ebp, _ZN8struct_210member_2_0E
 ; 686-O0-NEXT:    movl $0, _ZN8struct_210member_2_0E+4
-; 686-O0-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; 686-O0-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; 686-O0-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; 686-O0-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; 686-O0-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; 686-O0-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; 686-O0-NEXT:    movl %esi, (%esp) # 4-byte Spill
-; 686-O0-NEXT:    addl $36, %esp
+; 686-O0-NEXT:    addl $24, %esp
 ; 686-O0-NEXT:    popl %esi
 ; 686-O0-NEXT:    popl %edi
 ; 686-O0-NEXT:    popl %ebx
@@ -523,11 +517,12 @@ define void @f3() #0 {
 ; 686-O0-NEXT:    andl %edi, %eax
 ; 686-O0-NEXT:    movb %al, %dl
 ; 686-O0-NEXT:    movzbl %dl, %eax
-; 686-O0-NEXT:    movl %ecx, %esi
-; 686-O0-NEXT:    orl %eax, %esi
-; 686-O0-NEXT:    movl %esi, (%esp)
+; 686-O0-NEXT:    orl %eax, %ecx
+; 686-O0-NEXT:    movl %ecx, (%esp)
 ; 686-O0-NEXT:    movl $0, {{[0-9]+}}(%esp)
-; 686-O0-NEXT:    movl %ecx, var_46
+; 686-O0-NEXT:    movl var_13, %eax
+; 686-O0-NEXT:    notl %eax
+; 686-O0-NEXT:    movl %eax, var_46
 ; 686-O0-NEXT:    leal -8(%ebp), %esp
 ; 686-O0-NEXT:    popl %esi
 ; 686-O0-NEXT:    popl %edi

Modified: llvm/trunk/test/CodeGen/X86/pr32345.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32345.ll?rev=319142&r1=319141&r2=319142&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32345.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32345.ll Mon Nov 27 20:07:59 2017
@@ -58,9 +58,18 @@ define void @foo() {
 ; 6860-NEXT:    movw %cx, %di
 ; 6860-NEXT:    xorl %edx, %edi
 ; 6860-NEXT:    movw %di, %cx
-; 6860-NEXT:    movzwl %cx, %edi
-; 6860-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; 6860-NEXT:    movzwl %cx, %edx
+; 6860-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; 6860-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; 6860-NEXT:    movw var_22, %cx
+; 6860-NEXT:    movzwl var_27, %edx
+; 6860-NEXT:    movw %dx, %si
+; 6860-NEXT:    xorw %si, %cx
+; 6860-NEXT:    # implicit-def: %EDI
+; 6860-NEXT:    movw %cx, %di
+; 6860-NEXT:    xorl %edx, %edi
+; 6860-NEXT:    movw %di, %cx
+; 6860-NEXT:    movzwl %cx, %edi
 ; 6860-NEXT:    addl $-16610, %edx # imm = 0xBF1E
 ; 6860-NEXT:    movb %dl, %bl
 ; 6860-NEXT:    xorl %edx, %edx




More information about the llvm-commits mailing list