[llvm] r310183 - [SLPVectorizer] Add extra parameter to setInsertPointAfterBundle to handle different opcodes, NFCI.

Dinar Temirbulatov via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 5 11:43:52 PDT 2017


Author: dinar
Date: Sat Aug  5 11:43:52 2017
New Revision: 310183

URL: http://llvm.org/viewvc/llvm-project?rev=310183&view=rev
Log:
[SLPVectorizer] Add extra parameter to setInsertPointAfterBundle to handle different opcodes, NFCI.

Differential Revision: https://reviews.llvm.org/D35769

Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=310183&r1=310182&r2=310183&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sat Aug  5 11:43:52 2017
@@ -293,6 +293,26 @@ static bool isOdd(unsigned Value) {
   return Value & 1;
 }
 
+static bool sameOpcodeOrAlt(unsigned Opcode, unsigned AltOpcode,
+                            unsigned CheckedOpcode) {
+  return Opcode == CheckedOpcode || AltOpcode == CheckedOpcode;
+}
+
+/// Chooses the correct key for scheduling data. If \p Op has the same (or
+/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
+/// OpValue.
+static Value *isOneOf(Value *OpValue, Value *Op) {
+  auto *I = dyn_cast<Instruction>(Op);
+  if (!I)
+    return OpValue;
+  auto *OpInst = cast<Instruction>(OpValue);
+  unsigned OpInstOpcode = OpInst->getOpcode();
+  unsigned IOpcode = I->getOpcode();
+  if (sameOpcodeOrAlt(OpInstOpcode, getAltOpcode(OpInstOpcode), IOpcode))
+    return Op;
+  return OpValue;
+}
+
 ///\returns bool representing if Opcode \p Op can be part
 /// of an alternate sequence which can later be merged as
 /// a ShuffleVector instruction.
@@ -565,7 +585,7 @@ private:
 
   /// \brief Set the Builder insert point to one after the last instruction in
   /// the bundle
-  void setInsertPointAfterBundle(ArrayRef<Value *> VL);
+  void setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue);
 
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
@@ -751,9 +771,10 @@ private:
         : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
           NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
           Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
-          UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {}
+          UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false),
+          OpValue(nullptr) {}
 
-    void init(int BlockSchedulingRegionID) {
+    void init(int BlockSchedulingRegionID, Value *OpVal) {
       FirstInBundle = this;
       NextInBundle = nullptr;
       NextLoadStore = nullptr;
@@ -761,6 +782,7 @@ private:
       SchedulingRegionID = BlockSchedulingRegionID;
       UnscheduledDepsInBundle = UnscheduledDeps;
       clearDependencies();
+      OpValue = OpVal;
     }
 
     /// Returns true if the dependency information has been calculated.
@@ -865,6 +887,9 @@ private:
     /// True if this instruction is scheduled (or considered as scheduled in the
     /// dry-run).
     bool IsScheduled;
+
+    /// Opcode of the current instruction in the schedule data.
+    Value *OpValue;
   };
 
 #ifndef NDEBUG
@@ -2478,14 +2503,18 @@ void BoUpSLP::reorderInputsAccordingToOp
   }
 }
 
-void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) {
 
   // Get the basic block this bundle is in. All instructions in the bundle
   // should be in this block.
-  auto *Front = cast<Instruction>(VL.front());
+  auto *Front = cast<Instruction>(OpValue);
   auto *BB = Front->getParent();
-  assert(all_of(make_range(VL.begin(), VL.end()), [&](Value *V) -> bool {
-    return cast<Instruction>(V)->getParent() == BB;
+  const unsigned Opcode = cast<Instruction>(OpValue)->getOpcode();
+  const unsigned AltOpcode = getAltOpcode(Opcode);
+  assert(all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
+    return !sameOpcodeOrAlt(Opcode, AltOpcode,
+                            cast<Instruction>(V)->getOpcode()) ||
+           cast<Instruction>(V)->getParent() == BB;
   }));
 
   // The last instruction in the bundle in program order.
@@ -2496,10 +2525,12 @@ void BoUpSLP::setInsertPointAfterBundle(
   // VL.back() and iterate over schedule data until we reach the end of the
   // bundle. The end of the bundle is marked by null ScheduleData.
   if (BlocksSchedules.count(BB)) {
-    auto *Bundle = BlocksSchedules[BB]->getScheduleData(VL.back());
+    auto *Bundle =
+        BlocksSchedules[BB]->getScheduleData(isOneOf(OpValue, VL.back()));
     if (Bundle && Bundle->isPartOfBundle())
       for (; Bundle; Bundle = Bundle->NextInBundle)
-        LastInst = Bundle->Inst;
+        if (Bundle->OpValue == Bundle->Inst)
+          LastInst = Bundle->Inst;
   }
 
   // LastInst can still be null at this point if there's either not an entry
@@ -2523,7 +2554,7 @@ void BoUpSLP::setInsertPointAfterBundle(
   if (!LastInst) {
     SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end());
     for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
-      if (Bundle.erase(&I))
+      if (Bundle.erase(&I) && sameOpcodeOrAlt(Opcode, AltOpcode, I.getOpcode()))
         LastInst = &I;
       if (Bundle.empty())
         break;
@@ -2601,7 +2632,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
   VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
 
   if (E->NeedToGather) {
-    setInsertPointAfterBundle(E->Scalars);
+    setInsertPointAfterBundle(E->Scalars, VL0);
     auto *V = Gather(E->Scalars, VecTy);
     E->VectorizedValue = V;
     return V;
@@ -2651,7 +2682,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         E->VectorizedValue = V;
         return V;
       }
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
       auto *V = Gather(E->Scalars, VecTy);
       E->VectorizedValue = V;
       return V;
@@ -2666,7 +2697,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         E->VectorizedValue = V;
         return propagateMetadata(V, E->Scalars);
       }
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
       auto *V = Gather(E->Scalars, VecTy);
       E->VectorizedValue = V;
       return V;
@@ -2687,7 +2718,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       for (Value *V : E->Scalars)
         INVL.push_back(cast<Instruction>(V)->getOperand(0));
 
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *InVec = vectorizeTree(INVL);
 
@@ -2708,7 +2739,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         RHSV.push_back(cast<Instruction>(V)->getOperand(1));
       }
 
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *L = vectorizeTree(LHSV);
       Value *R = vectorizeTree(RHSV);
@@ -2736,7 +2767,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
       }
 
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *Cond = vectorizeTree(CondVec);
       Value *True = vectorizeTree(TrueVec);
@@ -2777,7 +2808,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
           RHSVL.push_back(cast<Instruction>(V)->getOperand(1));
         }
 
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *LHS = vectorizeTree(LHSVL);
       Value *RHS = vectorizeTree(RHSVL);
@@ -2799,7 +2830,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
     case Instruction::Load: {
       // Loads are inserted at the head of the tree because we don't want to
       // sink them all the way down past store instructions.
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       LoadInst *LI = cast<LoadInst>(VL0);
       Type *ScalarLoadTy = LI->getType();
@@ -2834,7 +2865,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       for (Value *V : E->Scalars)
         ValueOp.push_back(cast<StoreInst>(V)->getValueOperand());
 
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *VecValue = vectorizeTree(ValueOp);
       Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
@@ -2857,7 +2888,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       return propagateMetadata(S, E->Scalars);
     }
     case Instruction::GetElementPtr: {
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       ValueList Op0VL;
       for (Value *V : E->Scalars)
@@ -2888,7 +2919,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
     }
     case Instruction::Call: {
       CallInst *CI = cast<CallInst>(VL0);
-      setInsertPointAfterBundle(VL0);
+      setInsertPointAfterBundle(E->Scalars, VL0);
       Function *FI;
       Intrinsic::ID IID  = Intrinsic::not_intrinsic;
       Value *ScalarArg = nullptr;
@@ -2939,7 +2970,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       ValueList LHSVL, RHSVL;
       assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
       reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
-      setInsertPointAfterBundle(E->Scalars);
+      setInsertPointAfterBundle(E->Scalars, VL0);
 
       Value *LHS = vectorizeTree(LHSVL);
       Value *RHS = vectorizeTree(RHSVL);
@@ -3421,7 +3452,7 @@ void BoUpSLP::BlockScheduling::initSched
     }
     assert(!isInSchedulingRegion(SD) &&
            "new ScheduleData already in scheduling region");
-    SD->init(SchedulingRegionID);
+    SD->init(SchedulingRegionID, I);
 
     if (I->mayReadOrWriteMemory()) {
       // Update the linked list of memory accessing instructions.

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll?rev=310183&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll Sat Aug  5 11:43:52 2017
@@ -0,0 +1,708 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define zeroext i8 @foo(i32 %x, i32 %y, i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[B_A:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[A:%.*]]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = trunc i32 [[B_A]] to i8
+; CHECK-NEXT:    ret i8 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp slt i32 %x, %y
+  %b.a = select i1 %cmp, i32 %b, i32 %a
+  %retval.0 = trunc i32 %b.a to i8
+  ret i8 %retval.0
+}
+
+define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i8* noalias nocapture readonly %d, i8* noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> undef, i32 [[W:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[W]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[W]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[W]], i32 6
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[W]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[W]], i32 8
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[W]], i32 9
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[W]], i32 10
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[W]], i32 11
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[W]], i32 12
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[W]], i32 13
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[W]], i32 14
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[W]], i32 15
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
+; CHECK-NEXT:    [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
+; CHECK-NEXT:    [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
+; CHECK-NEXT:    [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
+; CHECK-NEXT:    [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
+; CHECK-NEXT:    [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
+; CHECK-NEXT:    [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
+; CHECK-NEXT:    [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
+; CHECK-NEXT:    [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
+; CHECK-NEXT:    [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
+; CHECK-NEXT:    [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
+; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
+; CHECK-NEXT:    [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
+; CHECK-NEXT:    [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
+; CHECK-NEXT:    [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
+; CHECK-NEXT:    [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
+; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1, !tbaa !0
+; CHECK-NEXT:    [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]]
+; CHECK-NEXT:    [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32>
+; CHECK-NEXT:    [[TMP27:%.*]] = mul <16 x i32> [[TMP15]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8>
+; CHECK-NEXT:    [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
+; CHECK-NEXT:    [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
+; CHECK-NEXT:    store <16 x i8> [[TMP28]], <16 x i8>* [[TMP29]], align 1, !tbaa !0
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
+; CHECK-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
+; CHECK-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
+; CHECK-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
+; CHECK-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0356 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.addr.0355 = phi i8* [ %a, %entry ], [ %add.ptr, %for.body ]
+  %e.addr.0354 = phi i8* [ %e, %entry ], [ %add.ptr192, %for.body ]
+  %d.addr.0353 = phi i8* [ %d, %entry ], [ %add.ptr191, %for.body ]
+  %c.addr.0352 = phi i8* [ %c, %entry ], [ %add.ptr190, %for.body ]
+  %b.addr.0351 = phi i8* [ %b, %entry ], [ %add.ptr189, %for.body ]
+  %0 = load i8, i8* %c.addr.0352, align 1, !tbaa !2
+  %1 = load i8, i8* %d.addr.0353, align 1, !tbaa !2
+  %2 = load i8, i8* %a.addr.0355, align 1, !tbaa !2
+  %3 = load i8, i8* %b.addr.0351, align 1, !tbaa !2
+  %cmp.i = icmp ult i8 %0, %1
+  %b.a.i.v.v = select i1 %cmp.i, i8 %3, i8 %2
+  %b.a.i.v = zext i8 %b.a.i.v.v to i32
+  %b.a.i = mul i32 %b.a.i.v, %w
+  %retval.0.i = trunc i32 %b.a.i to i8
+  store i8 %retval.0.i, i8* %e.addr.0354, align 1, !tbaa !2
+  %arrayidx9 = getelementptr inbounds i8, i8* %c.addr.0352, i64 1
+  %4 = load i8, i8* %arrayidx9, align 1, !tbaa !2
+  %arrayidx11 = getelementptr inbounds i8, i8* %d.addr.0353, i64 1
+  %5 = load i8, i8* %arrayidx11, align 1, !tbaa !2
+  %arrayidx13 = getelementptr inbounds i8, i8* %a.addr.0355, i64 1
+  %6 = load i8, i8* %arrayidx13, align 1, !tbaa !2
+  %arrayidx16 = getelementptr inbounds i8, i8* %b.addr.0351, i64 1
+  %7 = load i8, i8* %arrayidx16, align 1, !tbaa !2
+  %cmp.i348 = icmp ult i8 %4, %5
+  %b.a.i349.v.v = select i1 %cmp.i348, i8 %7, i8 %6
+  %b.a.i349.v = zext i8 %b.a.i349.v.v to i32
+  %b.a.i349 = mul i32 %b.a.i349.v, %w
+  %retval.0.i350 = trunc i32 %b.a.i349 to i8
+  %arrayidx20 = getelementptr inbounds i8, i8* %e.addr.0354, i64 1
+  store i8 %retval.0.i350, i8* %arrayidx20, align 1, !tbaa !2
+  %arrayidx21 = getelementptr inbounds i8, i8* %c.addr.0352, i64 2
+  %8 = load i8, i8* %arrayidx21, align 1, !tbaa !2
+  %arrayidx23 = getelementptr inbounds i8, i8* %d.addr.0353, i64 2
+  %9 = load i8, i8* %arrayidx23, align 1, !tbaa !2
+  %arrayidx25 = getelementptr inbounds i8, i8* %a.addr.0355, i64 2
+  %10 = load i8, i8* %arrayidx25, align 1, !tbaa !2
+  %arrayidx28 = getelementptr inbounds i8, i8* %b.addr.0351, i64 2
+  %11 = load i8, i8* %arrayidx28, align 1, !tbaa !2
+  %cmp.i345 = icmp ult i8 %8, %9
+  %b.a.i346.v.v = select i1 %cmp.i345, i8 %11, i8 %10
+  %b.a.i346.v = zext i8 %b.a.i346.v.v to i32
+  %b.a.i346 = mul i32 %b.a.i346.v, %w
+  %retval.0.i347 = trunc i32 %b.a.i346 to i8
+  %arrayidx32 = getelementptr inbounds i8, i8* %e.addr.0354, i64 2
+  store i8 %retval.0.i347, i8* %arrayidx32, align 1, !tbaa !2
+  %arrayidx33 = getelementptr inbounds i8, i8* %c.addr.0352, i64 3
+  %12 = load i8, i8* %arrayidx33, align 1, !tbaa !2
+  %arrayidx35 = getelementptr inbounds i8, i8* %d.addr.0353, i64 3
+  %13 = load i8, i8* %arrayidx35, align 1, !tbaa !2
+  %arrayidx37 = getelementptr inbounds i8, i8* %a.addr.0355, i64 3
+  %14 = load i8, i8* %arrayidx37, align 1, !tbaa !2
+  %arrayidx40 = getelementptr inbounds i8, i8* %b.addr.0351, i64 3
+  %15 = load i8, i8* %arrayidx40, align 1, !tbaa !2
+  %cmp.i342 = icmp ult i8 %12, %13
+  %b.a.i343.v.v = select i1 %cmp.i342, i8 %15, i8 %14
+  %b.a.i343.v = zext i8 %b.a.i343.v.v to i32
+  %b.a.i343 = mul i32 %b.a.i343.v, %w
+  %retval.0.i344 = trunc i32 %b.a.i343 to i8
+  %arrayidx44 = getelementptr inbounds i8, i8* %e.addr.0354, i64 3
+  store i8 %retval.0.i344, i8* %arrayidx44, align 1, !tbaa !2
+  %arrayidx45 = getelementptr inbounds i8, i8* %c.addr.0352, i64 4
+  %16 = load i8, i8* %arrayidx45, align 1, !tbaa !2
+  %arrayidx47 = getelementptr inbounds i8, i8* %d.addr.0353, i64 4
+  %17 = load i8, i8* %arrayidx47, align 1, !tbaa !2
+  %arrayidx49 = getelementptr inbounds i8, i8* %a.addr.0355, i64 4
+  %18 = load i8, i8* %arrayidx49, align 1, !tbaa !2
+  %arrayidx52 = getelementptr inbounds i8, i8* %b.addr.0351, i64 4
+  %19 = load i8, i8* %arrayidx52, align 1, !tbaa !2
+  %cmp.i339 = icmp ult i8 %16, %17
+  %b.a.i340.v.v = select i1 %cmp.i339, i8 %19, i8 %18
+  %b.a.i340.v = zext i8 %b.a.i340.v.v to i32
+  %b.a.i340 = mul i32 %b.a.i340.v, %w
+  %retval.0.i341 = trunc i32 %b.a.i340 to i8
+  %arrayidx56 = getelementptr inbounds i8, i8* %e.addr.0354, i64 4
+  store i8 %retval.0.i341, i8* %arrayidx56, align 1, !tbaa !2
+  %arrayidx57 = getelementptr inbounds i8, i8* %c.addr.0352, i64 5
+  %20 = load i8, i8* %arrayidx57, align 1, !tbaa !2
+  %arrayidx59 = getelementptr inbounds i8, i8* %d.addr.0353, i64 5
+  %21 = load i8, i8* %arrayidx59, align 1, !tbaa !2
+  %arrayidx61 = getelementptr inbounds i8, i8* %a.addr.0355, i64 5
+  %22 = load i8, i8* %arrayidx61, align 1, !tbaa !2
+  %arrayidx64 = getelementptr inbounds i8, i8* %b.addr.0351, i64 5
+  %23 = load i8, i8* %arrayidx64, align 1, !tbaa !2
+  %cmp.i336 = icmp ult i8 %20, %21
+  %b.a.i337.v.v = select i1 %cmp.i336, i8 %23, i8 %22
+  %b.a.i337.v = zext i8 %b.a.i337.v.v to i32
+  %b.a.i337 = mul i32 %b.a.i337.v, %w
+  %retval.0.i338 = trunc i32 %b.a.i337 to i8
+  %arrayidx68 = getelementptr inbounds i8, i8* %e.addr.0354, i64 5
+  store i8 %retval.0.i338, i8* %arrayidx68, align 1, !tbaa !2
+  %arrayidx69 = getelementptr inbounds i8, i8* %c.addr.0352, i64 6
+  %24 = load i8, i8* %arrayidx69, align 1, !tbaa !2
+  %arrayidx71 = getelementptr inbounds i8, i8* %d.addr.0353, i64 6
+  %25 = load i8, i8* %arrayidx71, align 1, !tbaa !2
+  %arrayidx73 = getelementptr inbounds i8, i8* %a.addr.0355, i64 6
+  %26 = load i8, i8* %arrayidx73, align 1, !tbaa !2
+  %arrayidx76 = getelementptr inbounds i8, i8* %b.addr.0351, i64 6
+  %27 = load i8, i8* %arrayidx76, align 1, !tbaa !2
+  %cmp.i333 = icmp ult i8 %24, %25
+  %b.a.i334.v.v = select i1 %cmp.i333, i8 %27, i8 %26
+  %b.a.i334.v = zext i8 %b.a.i334.v.v to i32
+  %b.a.i334 = mul i32 %b.a.i334.v, %w
+  %retval.0.i335 = trunc i32 %b.a.i334 to i8
+  %arrayidx80 = getelementptr inbounds i8, i8* %e.addr.0354, i64 6
+  store i8 %retval.0.i335, i8* %arrayidx80, align 1, !tbaa !2
+  %arrayidx81 = getelementptr inbounds i8, i8* %c.addr.0352, i64 7
+  %28 = load i8, i8* %arrayidx81, align 1, !tbaa !2
+  %arrayidx83 = getelementptr inbounds i8, i8* %d.addr.0353, i64 7
+  %29 = load i8, i8* %arrayidx83, align 1, !tbaa !2
+  %arrayidx85 = getelementptr inbounds i8, i8* %a.addr.0355, i64 7
+  %30 = load i8, i8* %arrayidx85, align 1, !tbaa !2
+  %arrayidx88 = getelementptr inbounds i8, i8* %b.addr.0351, i64 7
+  %31 = load i8, i8* %arrayidx88, align 1, !tbaa !2
+  %cmp.i330 = icmp ult i8 %28, %29
+  %b.a.i331.v.v = select i1 %cmp.i330, i8 %31, i8 %30
+  %b.a.i331.v = zext i8 %b.a.i331.v.v to i32
+  %b.a.i331 = mul i32 %b.a.i331.v, %w
+  %retval.0.i332 = trunc i32 %b.a.i331 to i8
+  %arrayidx92 = getelementptr inbounds i8, i8* %e.addr.0354, i64 7
+  store i8 %retval.0.i332, i8* %arrayidx92, align 1, !tbaa !2
+  %arrayidx93 = getelementptr inbounds i8, i8* %c.addr.0352, i64 8
+  %32 = load i8, i8* %arrayidx93, align 1, !tbaa !2
+  %arrayidx95 = getelementptr inbounds i8, i8* %d.addr.0353, i64 8
+  %33 = load i8, i8* %arrayidx95, align 1, !tbaa !2
+  %arrayidx97 = getelementptr inbounds i8, i8* %a.addr.0355, i64 8
+  %34 = load i8, i8* %arrayidx97, align 1, !tbaa !2
+  %arrayidx100 = getelementptr inbounds i8, i8* %b.addr.0351, i64 8
+  %35 = load i8, i8* %arrayidx100, align 1, !tbaa !2
+  %cmp.i327 = icmp ult i8 %32, %33
+  %b.a.i328.v.v = select i1 %cmp.i327, i8 %35, i8 %34
+  %b.a.i328.v = zext i8 %b.a.i328.v.v to i32
+  %b.a.i328 = mul i32 %b.a.i328.v, %w
+  %retval.0.i329 = trunc i32 %b.a.i328 to i8
+  %arrayidx104 = getelementptr inbounds i8, i8* %e.addr.0354, i64 8
+  store i8 %retval.0.i329, i8* %arrayidx104, align 1, !tbaa !2
+  %arrayidx105 = getelementptr inbounds i8, i8* %c.addr.0352, i64 9
+  %36 = load i8, i8* %arrayidx105, align 1, !tbaa !2
+  %arrayidx107 = getelementptr inbounds i8, i8* %d.addr.0353, i64 9
+  %37 = load i8, i8* %arrayidx107, align 1, !tbaa !2
+  %arrayidx109 = getelementptr inbounds i8, i8* %a.addr.0355, i64 9
+  %38 = load i8, i8* %arrayidx109, align 1, !tbaa !2
+  %arrayidx112 = getelementptr inbounds i8, i8* %b.addr.0351, i64 9
+  %39 = load i8, i8* %arrayidx112, align 1, !tbaa !2
+  %cmp.i324 = icmp ult i8 %36, %37
+  %b.a.i325.v.v = select i1 %cmp.i324, i8 %39, i8 %38
+  %b.a.i325.v = zext i8 %b.a.i325.v.v to i32
+  %b.a.i325 = mul i32 %b.a.i325.v, %w
+  %retval.0.i326 = trunc i32 %b.a.i325 to i8
+  %arrayidx116 = getelementptr inbounds i8, i8* %e.addr.0354, i64 9
+  store i8 %retval.0.i326, i8* %arrayidx116, align 1, !tbaa !2
+  %arrayidx117 = getelementptr inbounds i8, i8* %c.addr.0352, i64 10
+  %40 = load i8, i8* %arrayidx117, align 1, !tbaa !2
+  %arrayidx119 = getelementptr inbounds i8, i8* %d.addr.0353, i64 10
+  %41 = load i8, i8* %arrayidx119, align 1, !tbaa !2
+  %arrayidx121 = getelementptr inbounds i8, i8* %a.addr.0355, i64 10
+  %42 = load i8, i8* %arrayidx121, align 1, !tbaa !2
+  %arrayidx124 = getelementptr inbounds i8, i8* %b.addr.0351, i64 10
+  %43 = load i8, i8* %arrayidx124, align 1, !tbaa !2
+  %cmp.i321 = icmp ult i8 %40, %41
+  %b.a.i322.v.v = select i1 %cmp.i321, i8 %43, i8 %42
+  %b.a.i322.v = zext i8 %b.a.i322.v.v to i32
+  %b.a.i322 = mul i32 %b.a.i322.v, %w
+  %retval.0.i323 = trunc i32 %b.a.i322 to i8
+  %arrayidx128 = getelementptr inbounds i8, i8* %e.addr.0354, i64 10
+  store i8 %retval.0.i323, i8* %arrayidx128, align 1, !tbaa !2
+  %arrayidx129 = getelementptr inbounds i8, i8* %c.addr.0352, i64 11
+  %44 = load i8, i8* %arrayidx129, align 1, !tbaa !2
+  %arrayidx131 = getelementptr inbounds i8, i8* %d.addr.0353, i64 11
+  %45 = load i8, i8* %arrayidx131, align 1, !tbaa !2
+  %arrayidx133 = getelementptr inbounds i8, i8* %a.addr.0355, i64 11
+  %46 = load i8, i8* %arrayidx133, align 1, !tbaa !2
+  %arrayidx136 = getelementptr inbounds i8, i8* %b.addr.0351, i64 11
+  %47 = load i8, i8* %arrayidx136, align 1, !tbaa !2
+  %cmp.i318 = icmp ult i8 %44, %45
+  %b.a.i319.v.v = select i1 %cmp.i318, i8 %47, i8 %46
+  %b.a.i319.v = zext i8 %b.a.i319.v.v to i32
+  %b.a.i319 = mul i32 %b.a.i319.v, %w
+  %retval.0.i320 = trunc i32 %b.a.i319 to i8
+  %arrayidx140 = getelementptr inbounds i8, i8* %e.addr.0354, i64 11
+  store i8 %retval.0.i320, i8* %arrayidx140, align 1, !tbaa !2
+  %arrayidx141 = getelementptr inbounds i8, i8* %c.addr.0352, i64 12
+  %48 = load i8, i8* %arrayidx141, align 1, !tbaa !2
+  %arrayidx143 = getelementptr inbounds i8, i8* %d.addr.0353, i64 12
+  %49 = load i8, i8* %arrayidx143, align 1, !tbaa !2
+  %arrayidx145 = getelementptr inbounds i8, i8* %a.addr.0355, i64 12
+  %50 = load i8, i8* %arrayidx145, align 1, !tbaa !2
+  %arrayidx148 = getelementptr inbounds i8, i8* %b.addr.0351, i64 12
+  %51 = load i8, i8* %arrayidx148, align 1, !tbaa !2
+  %cmp.i315 = icmp ult i8 %48, %49
+  %b.a.i316.v.v = select i1 %cmp.i315, i8 %51, i8 %50
+  %b.a.i316.v = zext i8 %b.a.i316.v.v to i32
+  %b.a.i316 = mul i32 %b.a.i316.v, %w
+  %retval.0.i317 = trunc i32 %b.a.i316 to i8
+  %arrayidx152 = getelementptr inbounds i8, i8* %e.addr.0354, i64 12
+  store i8 %retval.0.i317, i8* %arrayidx152, align 1, !tbaa !2
+  %arrayidx153 = getelementptr inbounds i8, i8* %c.addr.0352, i64 13
+  %52 = load i8, i8* %arrayidx153, align 1, !tbaa !2
+  %arrayidx155 = getelementptr inbounds i8, i8* %d.addr.0353, i64 13
+  %53 = load i8, i8* %arrayidx155, align 1, !tbaa !2
+  %arrayidx157 = getelementptr inbounds i8, i8* %a.addr.0355, i64 13
+  %54 = load i8, i8* %arrayidx157, align 1, !tbaa !2
+  %arrayidx160 = getelementptr inbounds i8, i8* %b.addr.0351, i64 13
+  %55 = load i8, i8* %arrayidx160, align 1, !tbaa !2
+  %cmp.i312 = icmp ult i8 %52, %53
+  %b.a.i313.v.v = select i1 %cmp.i312, i8 %55, i8 %54
+  %b.a.i313.v = zext i8 %b.a.i313.v.v to i32
+  %b.a.i313 = mul i32 %b.a.i313.v, %w
+  %retval.0.i314 = trunc i32 %b.a.i313 to i8
+  %arrayidx164 = getelementptr inbounds i8, i8* %e.addr.0354, i64 13
+  store i8 %retval.0.i314, i8* %arrayidx164, align 1, !tbaa !2
+  %arrayidx165 = getelementptr inbounds i8, i8* %c.addr.0352, i64 14
+  %56 = load i8, i8* %arrayidx165, align 1, !tbaa !2
+  %arrayidx167 = getelementptr inbounds i8, i8* %d.addr.0353, i64 14
+  %57 = load i8, i8* %arrayidx167, align 1, !tbaa !2
+  %arrayidx169 = getelementptr inbounds i8, i8* %a.addr.0355, i64 14
+  %58 = load i8, i8* %arrayidx169, align 1, !tbaa !2
+  %arrayidx172 = getelementptr inbounds i8, i8* %b.addr.0351, i64 14
+  %59 = load i8, i8* %arrayidx172, align 1, !tbaa !2
+  %cmp.i309 = icmp ult i8 %56, %57
+  %b.a.i310.v.v = select i1 %cmp.i309, i8 %59, i8 %58
+  %b.a.i310.v = zext i8 %b.a.i310.v.v to i32
+  %b.a.i310 = mul i32 %b.a.i310.v, %w
+  %retval.0.i311 = trunc i32 %b.a.i310 to i8
+  %arrayidx176 = getelementptr inbounds i8, i8* %e.addr.0354, i64 14
+  store i8 %retval.0.i311, i8* %arrayidx176, align 1, !tbaa !2
+  %arrayidx177 = getelementptr inbounds i8, i8* %c.addr.0352, i64 15
+  %60 = load i8, i8* %arrayidx177, align 1, !tbaa !2
+  %arrayidx179 = getelementptr inbounds i8, i8* %d.addr.0353, i64 15
+  %61 = load i8, i8* %arrayidx179, align 1, !tbaa !2
+  %arrayidx181 = getelementptr inbounds i8, i8* %a.addr.0355, i64 15
+  %62 = load i8, i8* %arrayidx181, align 1, !tbaa !2
+  %arrayidx184 = getelementptr inbounds i8, i8* %b.addr.0351, i64 15
+  %63 = load i8, i8* %arrayidx184, align 1, !tbaa !2
+  %cmp.i306 = icmp ult i8 %60, %61
+  %b.a.i307.v.v = select i1 %cmp.i306, i8 %63, i8 %62
+  %b.a.i307.v = zext i8 %b.a.i307.v.v to i32
+  %b.a.i307 = mul i32 %b.a.i307.v, %w
+  %retval.0.i308 = trunc i32 %b.a.i307 to i8
+  %arrayidx188 = getelementptr inbounds i8, i8* %e.addr.0354, i64 15
+  store i8 %retval.0.i308, i8* %arrayidx188, align 1, !tbaa !2
+  %inc = add nuw nsw i32 %i.0356, 1
+  %add.ptr = getelementptr inbounds i8, i8* %a.addr.0355, i64 16
+  %add.ptr189 = getelementptr inbounds i8, i8* %b.addr.0351, i64 16
+  %add.ptr190 = getelementptr inbounds i8, i8* %c.addr.0352, i64 16
+  %add.ptr191 = getelementptr inbounds i8, i8* %d.addr.0353, i64 16
+  %add.ptr192 = getelementptr inbounds i8, i8* %e.addr.0354, i64 16
+  %exitcond = icmp eq i32 %inc, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+ at ib = local_unnamed_addr global [64 x i32] [i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0], align 16
+ at ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
+
+define i32 @foo1() local_unnamed_addr #0 {
+; CHECK-LABEL: @foo1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP2]]
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP4]]
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP6]]
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP8]]
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP11:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP10]]
+; CHECK-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP12]]
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP14]]
+; CHECK-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP16]]
+; CHECK-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP19:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP18]]
+; CHECK-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP21:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP20]]
+; CHECK-NEXT:    store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP23:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP22]]
+; CHECK-NEXT:    store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP25:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP24]]
+; CHECK-NEXT:    store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP27:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP26]]
+; CHECK-NEXT:    store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP29:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP28]]
+; CHECK-NEXT:    store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP31:%.*]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[TMP30]]
+; CHECK-NEXT:    store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16, !tbaa !0
+; CHECK-NEXT:    br label [[FOR_BODY5:%.*]]
+; CHECK:       for.cond3:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
+; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
+; CHECK:       for.body5:
+; CHECK-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !tbaa !0
+; CHECK-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP33]], -1
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @abort()
+; CHECK-NEXT:    unreachable
+; CHECK:       for.end14:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16, !tbaa !2
+  %neg = xor i32 %0, -1
+  store i32 %neg, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 0), align 16, !tbaa !2
+  %1 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 1), align 4, !tbaa !2
+  %neg.1 = xor i32 %1, -1
+  store i32 %neg.1, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 1), align 4, !tbaa !2
+  %2 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 2), align 8, !tbaa !2
+  %neg.2 = xor i32 %2, -1
+  store i32 %neg.2, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 2), align 8, !tbaa !2
+  %3 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 3), align 4, !tbaa !2
+  %neg.3 = xor i32 %3, -1
+  store i32 %neg.3, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 3), align 4, !tbaa !2
+  %4 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4), align 16, !tbaa !2
+  %neg.4 = xor i32 %4, -1
+  store i32 %neg.4, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4), align 16, !tbaa !2
+  %5 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 5), align 4, !tbaa !2
+  %neg.5 = xor i32 %5, -1
+  store i32 %neg.5, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 5), align 4, !tbaa !2
+  %6 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 6), align 8, !tbaa !2
+  %neg.6 = xor i32 %6, -1
+  store i32 %neg.6, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 6), align 8, !tbaa !2
+  %7 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 7), align 4, !tbaa !2
+  %neg.7 = xor i32 %7, -1
+  store i32 %neg.7, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 7), align 4, !tbaa !2
+  %8 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8), align 16, !tbaa !2
+  %neg.8 = xor i32 %8, -1
+  store i32 %neg.8, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8), align 16, !tbaa !2
+  %9 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 9), align 4, !tbaa !2
+  %neg.9 = xor i32 %9, -1
+  store i32 %neg.9, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 9), align 4, !tbaa !2
+  %10 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 10), align 8, !tbaa !2
+  %neg.10 = xor i32 %10, -1
+  store i32 %neg.10, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 10), align 8, !tbaa !2
+  %11 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 11), align 4, !tbaa !2
+  %neg.11 = xor i32 %11, -1
+  store i32 %neg.11, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 11), align 4, !tbaa !2
+  %12 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12), align 16, !tbaa !2
+  %neg.12 = xor i32 %12, -1
+  store i32 %neg.12, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12), align 16, !tbaa !2
+  %13 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 13), align 4, !tbaa !2
+  %neg.13 = xor i32 %13, -1
+  store i32 %neg.13, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 13), align 4, !tbaa !2
+  %14 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 14), align 8, !tbaa !2
+  %neg.14 = xor i32 %14, -1
+  store i32 %neg.14, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 14), align 8, !tbaa !2
+  %15 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 15), align 4, !tbaa !2
+  %neg.15 = xor i32 %15, -1
+  store i32 %neg.15, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 15), align 4, !tbaa !2
+  %16 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16), align 16, !tbaa !2
+  %neg.16 = xor i32 %16, -1
+  store i32 %neg.16, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16), align 16, !tbaa !2
+  %17 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 17), align 4, !tbaa !2
+  %neg.17 = xor i32 %17, -1
+  store i32 %neg.17, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 17), align 4, !tbaa !2
+  %18 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 18), align 8, !tbaa !2
+  %neg.18 = xor i32 %18, -1
+  store i32 %neg.18, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 18), align 8, !tbaa !2
+  %19 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 19), align 4, !tbaa !2
+  %neg.19 = xor i32 %19, -1
+  store i32 %neg.19, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 19), align 4, !tbaa !2
+  %20 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20), align 16, !tbaa !2
+  %neg.20 = xor i32 %20, -1
+  store i32 %neg.20, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20), align 16, !tbaa !2
+  %21 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 21), align 4, !tbaa !2
+  %neg.21 = xor i32 %21, -1
+  store i32 %neg.21, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 21), align 4, !tbaa !2
+  %22 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 22), align 8, !tbaa !2
+  %neg.22 = xor i32 %22, -1
+  store i32 %neg.22, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 22), align 8, !tbaa !2
+  %23 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 23), align 4, !tbaa !2
+  %neg.23 = xor i32 %23, -1
+  store i32 %neg.23, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 23), align 4, !tbaa !2
+  %24 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24), align 16, !tbaa !2
+  %neg.24 = xor i32 %24, -1
+  store i32 %neg.24, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24), align 16, !tbaa !2
+  %25 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 25), align 4, !tbaa !2
+  %neg.25 = xor i32 %25, -1
+  store i32 %neg.25, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 25), align 4, !tbaa !2
+  %26 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 26), align 8, !tbaa !2
+  %neg.26 = xor i32 %26, -1
+  store i32 %neg.26, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 26), align 8, !tbaa !2
+  %27 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 27), align 4, !tbaa !2
+  %neg.27 = xor i32 %27, -1
+  store i32 %neg.27, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 27), align 4, !tbaa !2
+  %28 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28), align 16, !tbaa !2
+  %neg.28 = xor i32 %28, -1
+  store i32 %neg.28, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28), align 16, !tbaa !2
+  %29 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 29), align 4, !tbaa !2
+  %neg.29 = xor i32 %29, -1
+  store i32 %neg.29, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 29), align 4, !tbaa !2
+  %30 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 30), align 8, !tbaa !2
+  %neg.30 = xor i32 %30, -1
+  store i32 %neg.30, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 30), align 8, !tbaa !2
+  %31 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 31), align 4, !tbaa !2
+  %neg.31 = xor i32 %31, -1
+  store i32 %neg.31, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 31), align 4, !tbaa !2
+  %32 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32), align 16, !tbaa !2
+  %neg.32 = xor i32 %32, -1
+  store i32 %neg.32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32), align 16, !tbaa !2
+  %33 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 33), align 4, !tbaa !2
+  %neg.33 = xor i32 %33, -1
+  store i32 %neg.33, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 33), align 4, !tbaa !2
+  %34 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 34), align 8, !tbaa !2
+  %neg.34 = xor i32 %34, -1
+  store i32 %neg.34, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 34), align 8, !tbaa !2
+  %35 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 35), align 4, !tbaa !2
+  %neg.35 = xor i32 %35, -1
+  store i32 %neg.35, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 35), align 4, !tbaa !2
+  %36 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36), align 16, !tbaa !2
+  %neg.36 = xor i32 %36, -1
+  store i32 %neg.36, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36), align 16, !tbaa !2
+  %37 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 37), align 4, !tbaa !2
+  %neg.37 = xor i32 %37, -1
+  store i32 %neg.37, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 37), align 4, !tbaa !2
+  %38 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 38), align 8, !tbaa !2
+  %neg.38 = xor i32 %38, -1
+  store i32 %neg.38, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 38), align 8, !tbaa !2
+  %39 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 39), align 4, !tbaa !2
+  %neg.39 = xor i32 %39, -1
+  store i32 %neg.39, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 39), align 4, !tbaa !2
+  %40 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40), align 16, !tbaa !2
+  %neg.40 = xor i32 %40, -1
+  store i32 %neg.40, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40), align 16, !tbaa !2
+  %41 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 41), align 4, !tbaa !2
+  %neg.41 = xor i32 %41, -1
+  store i32 %neg.41, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 41), align 4, !tbaa !2
+  %42 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 42), align 8, !tbaa !2
+  %neg.42 = xor i32 %42, -1
+  store i32 %neg.42, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 42), align 8, !tbaa !2
+  %43 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 43), align 4, !tbaa !2
+  %neg.43 = xor i32 %43, -1
+  store i32 %neg.43, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 43), align 4, !tbaa !2
+  %44 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44), align 16, !tbaa !2
+  %neg.44 = xor i32 %44, -1
+  store i32 %neg.44, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44), align 16, !tbaa !2
+  %45 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 45), align 4, !tbaa !2
+  %neg.45 = xor i32 %45, -1
+  store i32 %neg.45, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 45), align 4, !tbaa !2
+  %46 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 46), align 8, !tbaa !2
+  %neg.46 = xor i32 %46, -1
+  store i32 %neg.46, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 46), align 8, !tbaa !2
+  %47 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 47), align 4, !tbaa !2
+  %neg.47 = xor i32 %47, -1
+  store i32 %neg.47, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 47), align 4, !tbaa !2
+  %48 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48), align 16, !tbaa !2
+  %neg.48 = xor i32 %48, -1
+  store i32 %neg.48, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48), align 16, !tbaa !2
+  %49 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 49), align 4, !tbaa !2
+  %neg.49 = xor i32 %49, -1
+  store i32 %neg.49, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 49), align 4, !tbaa !2
+  %50 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 50), align 8, !tbaa !2
+  %neg.50 = xor i32 %50, -1
+  store i32 %neg.50, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 50), align 8, !tbaa !2
+  %51 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 51), align 4, !tbaa !2
+  %neg.51 = xor i32 %51, -1
+  store i32 %neg.51, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 51), align 4, !tbaa !2
+  %52 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52), align 16, !tbaa !2
+  %neg.52 = xor i32 %52, -1
+  store i32 %neg.52, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52), align 16, !tbaa !2
+  %53 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 53), align 4, !tbaa !2
+  %neg.53 = xor i32 %53, -1
+  store i32 %neg.53, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 53), align 4, !tbaa !2
+  %54 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 54), align 8, !tbaa !2
+  %neg.54 = xor i32 %54, -1
+  store i32 %neg.54, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 54), align 8, !tbaa !2
+  %55 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 55), align 4, !tbaa !2
+  %neg.55 = xor i32 %55, -1
+  store i32 %neg.55, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 55), align 4, !tbaa !2
+  %56 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56), align 16, !tbaa !2
+  %neg.56 = xor i32 %56, -1
+  store i32 %neg.56, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56), align 16, !tbaa !2
+  %57 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 57), align 4, !tbaa !2
+  %neg.57 = xor i32 %57, -1
+  store i32 %neg.57, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 57), align 4, !tbaa !2
+  %58 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 58), align 8, !tbaa !2
+  %neg.58 = xor i32 %58, -1
+  store i32 %neg.58, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 58), align 8, !tbaa !2
+  %59 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 59), align 4, !tbaa !2
+  %neg.59 = xor i32 %59, -1
+  store i32 %neg.59, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 59), align 4, !tbaa !2
+  %60 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60), align 16, !tbaa !2
+  %neg.60 = xor i32 %60, -1
+  store i32 %neg.60, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60), align 16, !tbaa !2
+  %61 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 61), align 4, !tbaa !2
+  %neg.61 = xor i32 %61, -1
+  store i32 %neg.61, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 61), align 4, !tbaa !2
+  %62 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 62), align 8, !tbaa !2
+  %neg.62 = xor i32 %62, -1
+  store i32 %neg.62, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 62), align 8, !tbaa !2
+  %63 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 63), align 4, !tbaa !2
+  %neg.63 = xor i32 %63, -1
+  store i32 %neg.63, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 63), align 4, !tbaa !2
+  br label %for.body5
+
+for.cond3:                                        ; preds = %for.body5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp4 = icmp ult i64 %indvars.iv, 63
+  br i1 %cmp4, label %for.body5, label %for.end14
+
+for.body5:                                        ; preds = %entry, %for.cond3
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond3 ]
+  %arrayidx7 = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 %indvars.iv
+  %64 = load i32, i32* %arrayidx7, align 4, !tbaa !2
+  %arrayidx9 = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 %indvars.iv
+  %65 = load i32, i32* %arrayidx9, align 4, !tbaa !2
+  %neg10 = xor i32 %65, -1
+  %cmp11 = icmp eq i32 %64, %neg10
+  br i1 %cmp11, label %for.cond3, label %if.then
+
+if.then:                                          ; preds = %for.body5
+  tail call void @abort() #2
+  unreachable
+
+for.end14:                                        ; preds = %for.cond3
+  ret i32 0
+}
+
+declare void @abort() #2
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 6.0.0 (http://llvm.org/git/clang.git b376dd59b0bbde66fd68b589ce217154d16a87c2) (http://llvm.org/git/llvm.git 9f9634137275c12ddb1adcc9cb59e52db8064810)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}




More information about the llvm-commits mailing list