[llvm] 857bf30 - [Attributor] Remove broken and duplicated load simplification

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 5 18:49:30 PDT 2022


Author: Johannes Doerfert
Date: 2022-04-05T20:49:03-05:00
New Revision: 857bf306d7984cb464cd0de6371df7c934c6642a

URL: https://github.com/llvm/llvm-project/commit/857bf306d7984cb464cd0de6371df7c934c6642a
DIFF: https://github.com/llvm/llvm-project/commit/857bf306d7984cb464cd0de6371df7c934c6642a.diff

LOG: [Attributor] Remove broken and duplicated load simplification

We look through loads in the "generic value traversal" and we
consequently don't need to look through them again in AAValueSimplify*.
The test changes stem from the fact that we allowed any simplified
value, incl. non-dynamically unique ones, as long as the underlying
memory was an alloca. This doesn't seem to make sense as allocas do not
protect against dynamically non-unique values. We need to make the
unique check better rather than excluding allocas. That in mind, we can
remove a lot of code by simply relying on the generic value traversal
load look through.

To soften the blow some minor adjustments have been made that allow more
simplification through the now used scheme and some tests have been
given a `norecurse` for now.

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll
    llvm/test/Transforms/Attributor/callbacks.ll
    llvm/test/Transforms/Attributor/heap_to_stack.ll
    llvm/test/Transforms/Attributor/norecurse.ll
    llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
    llvm/test/Transforms/Attributor/value-simplify.ll
    llvm/test/Transforms/OpenMP/barrier_removal.ll
    llvm/test/Transforms/OpenMP/custom_state_machines.ll
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 489914b27ccae..40d6cc10a5624 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -214,7 +214,7 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
     return !C->isThreadDependent();
   // TODO: Inspect and cache more complex instructions.
   if (auto *CB = dyn_cast<CallBase>(&V))
-    return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() &&
+    return CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
            !CB->mayReadFromMemory();
   const Function *Scope = nullptr;
   if (auto *I = dyn_cast<Instruction>(&V))
@@ -388,15 +388,16 @@ getPotentialCopiesOfMemoryValue(Attributor &A, Ty &I,
     auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
       if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
         return true;
-      if (OnlyExact && !IsExact) {
+      if (IsLoad && Acc.isWrittenValueYetUndetermined())
+        return true;
+      if (OnlyExact && !IsExact &&
+          !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) {
         LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
                           << ", abort!\n");
         return false;
       }
       if (IsLoad) {
         assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
-        if (Acc.isWrittenValueYetUndetermined())
-          return true;
         if (!Acc.isWrittenValueUnknown()) {
           NewCopies.push_back(Acc.getWrittenValue());
           return true;

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b8c48648d24c8..4610ec7b90c88 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5389,76 +5389,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
     SimplifiedAssociatedValue = &getAssociatedValue();
     return AAValueSimplify::indicatePessimisticFixpoint();
   }
-
-  static bool handleLoad(Attributor &A, const AbstractAttribute &AA,
-                         LoadInst &L, function_ref<bool(Value &)> Union) {
-    auto UnionWrapper = [&](Value &V, Value &Obj) {
-      if (isa<AllocaInst>(Obj))
-        return Union(V);
-      if (!AA::isDynamicallyUnique(A, AA, V))
-        return false;
-      ValueToValueMapTy VMap;
-      if (!reproduceValue(A, AA, V, *L.getType(), &L, /* CheckOnly */ true,
-                          VMap))
-        return false;
-      return Union(V);
-    };
-
-    Value &Ptr = *L.getPointerOperand();
-    SmallVector<Value *, 8> Objects;
-    bool UsedAssumedInformation = false;
-    if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L,
-                                         UsedAssumedInformation))
-      return false;
-
-    const auto *TLI =
-        A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction());
-    for (Value *Obj : Objects) {
-      LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
-      if (isa<UndefValue>(Obj))
-        continue;
-      if (isa<ConstantPointerNull>(Obj)) {
-        // A null pointer access can be undefined but any offset from null may
-        // be OK. We do not try to optimize the latter.
-        if (!NullPointerIsDefined(L.getFunction(),
-                                  Ptr.getType()->getPointerAddressSpace()) &&
-            A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
-          continue;
-        return false;
-      }
-      Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI);
-      if (!InitialVal || !Union(*InitialVal))
-        return false;
-
-      LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
-                           "propagation, checking accesses next.\n");
-
-      auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
-        LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
-        if (Acc.isWrittenValueYetUndetermined())
-          return true;
-        Value *Content = Acc.getWrittenValue();
-        if (!Content)
-          return false;
-        Value *CastedContent =
-            AA::getWithType(*Content, *AA.getAssociatedType());
-        if (!CastedContent)
-          return false;
-        if (IsExact)
-          return UnionWrapper(*CastedContent, *Obj);
-        if (auto *C = dyn_cast<Constant>(CastedContent))
-          if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
-            return UnionWrapper(*CastedContent, *Obj);
-        return false;
-      };
-
-      auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
-                                           DepClassTy::REQUIRED);
-      if (!PI.forallInterferingAccesses(A, AA, L, CheckAccess))
-        return false;
-    }
-    return true;
-  }
 };
 
 struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
@@ -5685,15 +5615,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
     return true;
   }
 
-  bool updateWithLoad(Attributor &A, LoadInst &L) {
-    auto Union = [&](Value &V) {
-      SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
-          SimplifiedAssociatedValue, &V, L.getType());
-      return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
-    };
-    return handleLoad(A, *this, L, Union);
-  }
-
   /// Use the generic, non-optimistic InstSimplfy functionality if we managed to
   /// simplify any operand of the instruction \p I. Return true if successful,
   /// in that case SimplifiedAssociatedValue will be updated.
@@ -5757,9 +5678,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
       if (!Stripped && this == &AA) {
 
         if (auto *I = dyn_cast<Instruction>(&V)) {
-          if (auto *LI = dyn_cast<LoadInst>(&V))
-            if (updateWithLoad(A, *LI))
-              return true;
           if (auto *Cmp = dyn_cast<CmpInst>(&V))
             if (handleCmp(A, *Cmp))
               return true;
@@ -9253,30 +9171,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
                                          : ChangeStatus::CHANGED;
   }
 
-  ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) {
-    if (!L.getType()->isIntegerTy())
-      return indicatePessimisticFixpoint();
-
-    auto Union = [&](Value &V) {
-      if (isa<UndefValue>(V)) {
-        unionAssumedWithUndef();
-        return true;
-      }
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) {
-        unionAssumed(CI->getValue());
-        return true;
-      }
-      return false;
-    };
-    auto AssumedBefore = getAssumed();
-
-    if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union))
-      return indicatePessimisticFixpoint();
-
-    return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
-                                         : ChangeStatus::CHANGED;
-  }
-
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     Value &V = getAssociatedValue();
@@ -9297,9 +9191,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
     if (auto *PHI = dyn_cast<PHINode>(I))
       return updateWithPHINode(A, PHI);
 
-    if (auto *L = dyn_cast<LoadInst>(I))
-      return updateWithLoad(A, *L);
-
     return indicatePessimisticFixpoint();
   }
 

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll
index 1670df65c9b9d..827ff2ad8a255 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll
@@ -17,22 +17,14 @@ define i32 @bar(i32 %arg) {
 ; IS________OPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[STACK]])
 ; IS________OPM-NEXT:    ret i32 [[CALL]]
 ;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar
-; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) {
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
-; IS__TUNIT_NPM-NEXT:    store i32 [[ARG]], i32* [[STACK]], align 4
-; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
-; IS__TUNIT_NPM-NEXT:    ret i32 [[CALL]]
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
-; IS__CGSCC_NPM-SAME: (i32 returned [[ARG:%.*]]) {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[ARG]], i32* [[STACK]], align 4
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[ARG]])
-; IS__CGSCC_NPM-NEXT:    ret i32 [[ARG]]
+; IS________NPM-LABEL: define {{[^@]+}}@bar
+; IS________NPM-SAME: (i32 [[ARG:%.*]]) {
+; IS________NPM-NEXT:  entry:
+; IS________NPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
+; IS________NPM-NEXT:    store i32 [[ARG]], i32* [[STACK]], align 4
+; IS________NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
+; IS________NPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
+; IS________NPM-NEXT:    ret i32 [[CALL]]
 ;
 entry:
   %stack = alloca i32

diff  --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index ebbc003ec2035..aed46afdb62b4 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -85,7 +85,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________OPM-NEXT:    tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________OPM-NEXT:    tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t0_callback_callee
@@ -94,7 +94,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________NPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________NPM-NEXT:    tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________NPM-NEXT:    tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
 ; IS________NPM-NEXT:    ret void
 ;
 entry:
@@ -186,7 +186,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________OPM-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________OPM-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM: Function Attrs: nosync
@@ -196,7 +196,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________NPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________NPM-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________NPM-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________NPM-NEXT:    ret void
 ;
 entry:
@@ -287,7 +287,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________OPM-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________OPM-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t2_callback_callee
@@ -296,7 +296,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________NPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________NPM-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________NPM-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________NPM-NEXT:    ret void
 ;
 entry:
@@ -392,7 +392,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________OPM-NEXT:    tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________OPM-NEXT:    tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t3_callback_callee
@@ -401,7 +401,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
 ; IS________NPM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
-; IS________NPM-NEXT:    tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
+; IS________NPM-NEXT:    tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
 ; IS________NPM-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 24c7e337a3f21..4ee4efd6c1133 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
 ; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll
index dd096a32c147b..85f469d57554d 100644
--- a/llvm/test/Transforms/Attributor/norecurse.ll
+++ b/llvm/test/Transforms/Attributor/norecurse.ll
@@ -159,7 +159,8 @@ define void @f(i32 %x)  {
 ; NOT_CGSCC_NPM-NEXT:  entry:
 ; NOT_CGSCC_NPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
 ; NOT_CGSCC_NPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; NOT_CGSCC_NPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
+; NOT_CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
+; NOT_CGSCC_NPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
 ; NOT_CGSCC_NPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NOT_CGSCC_NPM:       if.then:
 ; NOT_CGSCC_NPM-NEXT:    br label [[IF_END]]
@@ -171,6 +172,7 @@ define void @f(i32 %x)  {
 ; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+; IS__CGSCC_NPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS__CGSCC_NPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC_NPM:       if.then:

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 2c8fe80f7bf78..8f96becbac22e 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -109,16 +109,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR12:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR13:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_OPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
 ; IS__TUNIT_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__TUNIT_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -136,7 +136,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_OPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__TUNIT_OPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR12]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR13]]
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
@@ -145,16 +145,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR10:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR12:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_NPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
 ; IS__TUNIT_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR11:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__TUNIT_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -172,7 +172,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_NPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__TUNIT_NPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
@@ -181,16 +181,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; IS__CGSCC_OPM-NEXT:    [[I:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR12:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR13:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__CGSCC_OPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
 ; IS__CGSCC_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__CGSCC_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__CGSCC_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -208,7 +208,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__CGSCC_OPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__CGSCC_OPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR12]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR13]]
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
@@ -217,16 +217,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[I:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR10:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR12:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__CGSCC_NPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
 ; IS__CGSCC_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__CGSCC_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR11:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__CGSCC_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -244,7 +244,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__CGSCC_NPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__CGSCC_NPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -337,10 +337,11 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_OPM:       for.body:
 ; IS__TUNIT_OPM-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[I15]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 2, !tbaa [[TBAA15:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_OPM:       for.inc:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_OPM:       for.cond2:
@@ -354,10 +355,11 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_OPM-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[I16]], i64 [[I18]]
+; IS__TUNIT_OPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_OPM:       for.inc9:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end11:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_OPM:       for.cond13:
@@ -371,15 +373,17 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_OPM-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, i64* [[I19]], i64 [[I21]]
+; IS__TUNIT_OPM-NEXT:    store i64 0, i64* [[ARRAYIDX21]], align 16, !tbaa [[TBAA20:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC22]]
 ; IS__TUNIT_OPM:       for.inc22:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end24:
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 1023
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500
 ; IS__TUNIT_OPM-NEXT:    [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32*
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR14:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR15:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND28:%.*]]
 ; IS__TUNIT_OPM:       for.cond28:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
@@ -388,12 +392,14 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_OPM:       for.cond.cleanup30:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_END38:%.*]]
 ; IS__TUNIT_OPM:       for.body31:
+; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
+; IS__TUNIT_OPM-NEXT:    [[I23:%.*]] = load i8, i8* [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA19:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    store i8 [[I23]], i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC36]]
 ; IS__TUNIT_OPM:       for.inc36:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end38:
 ; IS__TUNIT_OPM-NEXT:    [[I24:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
 ; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I24]])
@@ -405,7 +411,7 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__TUNIT_NPM:       for.cond:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
@@ -416,10 +422,11 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM:       for.body:
 ; IS__TUNIT_NPM-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[I15]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 2, !tbaa [[TBAA15:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_NPM:       for.inc:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_NPM:       for.cond2:
@@ -433,10 +440,11 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[I16]], i64 [[I18]]
+; IS__TUNIT_NPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_NPM:       for.inc9:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end11:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_NPM:       for.cond13:
@@ -450,15 +458,17 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, i64* [[I19]], i64 [[I21]]
+; IS__TUNIT_NPM-NEXT:    store i64 0, i64* [[ARRAYIDX21]], align 16, !tbaa [[TBAA20:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC22]]
 ; IS__TUNIT_NPM:       for.inc22:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end24:
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 1023
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500
 ; IS__TUNIT_NPM-NEXT:    [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32*
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND28:%.*]]
 ; IS__TUNIT_NPM:       for.cond28:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
@@ -467,15 +477,17 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM:       for.cond.cleanup30:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_END38:%.*]]
 ; IS__TUNIT_NPM:       for.body31:
+; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
+; IS__TUNIT_NPM-NEXT:    [[I23:%.*]] = load i8, i8* [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA19:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    store i8 [[I23]], i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC36]]
 ; IS__TUNIT_NPM:       for.inc36:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end38:
 ; IS__TUNIT_NPM-NEXT:    [[I24:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I24]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I24]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2() {
@@ -539,7 +551,7 @@ define void @local_alloca_simplifiable_2() {
 ; IS__CGSCC_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; IS__CGSCC_OPM-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500
 ; IS__CGSCC_OPM-NEXT:    [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32*
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR14:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR15:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND28:%.*]]
 ; IS__CGSCC_OPM:       for.cond28:
 ; IS__CGSCC_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
@@ -565,7 +577,7 @@ define void @local_alloca_simplifiable_2() {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; IS__CGSCC_NPM-NEXT:    [[I:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__CGSCC_NPM:       for.cond:
 ; IS__CGSCC_NPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
@@ -622,7 +634,7 @@ define void @local_alloca_simplifiable_2() {
 ; IS__CGSCC_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; IS__CGSCC_NPM-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500
 ; IS__CGSCC_NPM-NEXT:    [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32*
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR12:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR14:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND28:%.*]]
 ; IS__CGSCC_NPM:       for.cond28:
 ; IS__CGSCC_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
@@ -819,7 +831,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]]
 ; IS__TUNIT_OPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__TUNIT_OPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__TUNIT_OPM:       cond.true:
@@ -828,7 +840,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_OPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_OPM:       cond.end:
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR12]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR13]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 5
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -837,7 +849,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__TUNIT_NPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__TUNIT_NPM:       cond.true:
@@ -846,7 +858,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_NPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_NPM:       cond.end:
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -855,7 +867,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__CGSCC_OPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]]
 ; IS__CGSCC_OPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__CGSCC_OPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__CGSCC_OPM:       cond.true:
@@ -864,7 +876,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__CGSCC_OPM-NEXT:    br label [[COND_END]]
 ; IS__CGSCC_OPM:       cond.end:
 ; IS__CGSCC_OPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR12]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR13]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -873,7 +885,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__CGSCC_NPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__CGSCC_NPM:       cond.true:
@@ -882,7 +894,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__CGSCC_NPM-NEXT:    br label [[COND_END]]
 ; IS__CGSCC_NPM:       cond.end:
 ; IS__CGSCC_NPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 5
 ;
 entry:
@@ -923,7 +935,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]]
 ; IS__TUNIT_OPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__TUNIT_OPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__TUNIT_OPM:       cond.true:
@@ -932,7 +944,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_OPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_OPM:       cond.end:
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR13]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 5
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -941,7 +953,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__TUNIT_NPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__TUNIT_NPM:       cond.true:
@@ -950,7 +962,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_NPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_NPM:       cond.end:
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR10]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -959,7 +971,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__CGSCC_OPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]]
 ; IS__CGSCC_OPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__CGSCC_OPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__CGSCC_OPM:       cond.true:
@@ -968,7 +980,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__CGSCC_OPM-NEXT:    br label [[COND_END]]
 ; IS__CGSCC_OPM:       cond.end:
 ; IS__CGSCC_OPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR13]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn
@@ -977,7 +989,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    [[I:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; IS__CGSCC_NPM-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
 ; IS__CGSCC_NPM:       cond.true:
@@ -986,7 +998,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__CGSCC_NPM-NEXT:    br label [[COND_END]]
 ; IS__CGSCC_NPM:       cond.end:
 ; IS__CGSCC_NPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 5
 ;
 entry:
@@ -1034,9 +1046,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
 ; IS__TUNIT_OPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR6:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR13]]
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR13]]
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR14]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR14]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__TUNIT_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -1059,9 +1071,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
 ; IS__TUNIT_NPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR11]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR11]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR13]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR13]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__TUNIT_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -1084,9 +1096,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
 ; IS__CGSCC_OPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR13]]
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR13]]
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR14]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR14]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__CGSCC_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -1109,9 +1121,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
 ; IS__CGSCC_NPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR11]]
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR11]]
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
 ; IS__CGSCC_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
@@ -1180,9 +1192,9 @@ entry:
 ;    }
 ;
 define void @static_global_simplifiable_2() {
-; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
+; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@static_global_simplifiable_2
-; IS__TUNIT_OPM-SAME: () #[[ATTR7:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR3]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__TUNIT_OPM:       for.cond:
@@ -1194,10 +1206,11 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_OPM:       for.body:
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 [[I]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 2, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_OPM:       for.inc:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_OPM:       for.cond2:
@@ -1210,10 +1223,11 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_OPM-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* bitcast ([1024 x i8]* @GBytes to float*), i64 [[I16]]
+; IS__TUNIT_OPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_OPM:       for.inc9:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end11:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_OPM:       for.cond13:
@@ -1226,12 +1240,14 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_OPM-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, i64* bitcast ([1024 x i8]* @GBytes to i64*), i64 [[I18]]
+; IS__TUNIT_OPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 16, !tbaa [[TBAA20]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC21]]
 ; IS__TUNIT_OPM:       for.inc21:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end23:
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR14]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND25:%.*]]
 ; IS__TUNIT_OPM:       for.cond25:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1240,18 +1256,20 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_OPM:       for.cond.cleanup27:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_END35:%.*]]
 ; IS__TUNIT_OPM:       for.body28:
+; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 [[INDVARS_IV12]]
+; IS__TUNIT_OPM-NEXT:    [[I19:%.*]] = load i8, i8* [[ARRAYIDX30]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_OPM-NEXT:    store i8 [[I19]], i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC33]]
 ; IS__TUNIT_OPM:       for.inc33:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end35:
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
-; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
+; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@static_global_simplifiable_2
-; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR3]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__TUNIT_NPM:       for.cond:
@@ -1263,10 +1281,11 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_NPM:       for.body:
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 [[I]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 2, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_NPM:       for.inc:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_NPM:       for.cond2:
@@ -1279,10 +1298,11 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* bitcast ([1024 x i8]* @GBytes to float*), i64 [[I16]]
+; IS__TUNIT_NPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_NPM:       for.inc9:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end11:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_NPM:       for.cond13:
@@ -1295,12 +1315,14 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, i64* bitcast ([1024 x i8]* @GBytes to i64*), i64 [[I18]]
+; IS__TUNIT_NPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 16, !tbaa [[TBAA20]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC21]]
 ; IS__TUNIT_NPM:       for.inc21:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end23:
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND25:%.*]]
 ; IS__TUNIT_NPM:       for.cond25:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1309,12 +1331,14 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_NPM:       for.cond.cleanup27:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_END35:%.*]]
 ; IS__TUNIT_NPM:       for.body28:
+; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 [[INDVARS_IV12]]
+; IS__TUNIT_NPM-NEXT:    [[I19:%.*]] = load i8, i8* [[ARRAYIDX30]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_NPM-NEXT:    store i8 [[I19]], i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC33]]
 ; IS__TUNIT_NPM:       for.inc33:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end35:
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
@@ -1371,7 +1395,7 @@ define void @static_global_simplifiable_2() {
 ; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IS__CGSCC_OPM:       for.end23:
 ; IS__CGSCC_OPM-NEXT:    store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR14]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR15]]
 ; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND25:%.*]]
 ; IS__CGSCC_OPM:       for.cond25:
 ; IS__CGSCC_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1444,7 +1468,7 @@ define void @static_global_simplifiable_2() {
 ; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IS__CGSCC_NPM:       for.end23:
 ; IS__CGSCC_NPM-NEXT:    store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR12]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR14]]
 ; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND25:%.*]]
 ; IS__CGSCC_NPM:       for.cond25:
 ; IS__CGSCC_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1618,11 +1642,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__TUNIT_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]]
 ; IS__TUNIT_OPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1668,11 +1692,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__TUNIT_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1718,11 +1742,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__CGSCC_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__CGSCC_OPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__CGSCC_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]]
 ; IS__CGSCC_OPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__CGSCC_OPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1768,11 +1792,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__CGSCC_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__CGSCC_NPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__CGSCC_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR11]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]]
 ; IS__CGSCC_NPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__CGSCC_NPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1884,11 +1908,11 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_OPM:       for.body:
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 [[I]]
-; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_OPM:       for.inc:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_OPM:       for.cond2:
@@ -1902,11 +1926,11 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_OPM-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[I15]], i64 [[I17]]
-; IS__TUNIT_OPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA26:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_OPM:       for.inc9:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end11:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_OPM:       for.cond13:
@@ -1920,17 +1944,17 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_OPM-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_OPM-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, i64* [[I18]], i64 [[I20]]
-; IS__TUNIT_OPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 8, !tbaa [[TBAA28:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 8, !tbaa [[TBAA20]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC21]]
 ; IS__TUNIT_OPM:       for.inc21:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
 ; IS__TUNIT_OPM:       for.end23:
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 1023
-; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500
 ; IS__TUNIT_OPM-NEXT:    [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32*
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR14]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND27:%.*]]
 ; IS__TUNIT_OPM:       for.cond27:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1940,9 +1964,9 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_END37:%.*]]
 ; IS__TUNIT_OPM:       for.body30:
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    [[I22:%.*]] = load i8, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_OPM-NEXT:    [[I22:%.*]] = load i8, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    store i8 [[I22]], i8* [[ARRAYIDX34]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_OPM-NEXT:    store i8 [[I22]], i8* [[ARRAYIDX34]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC35]]
 ; IS__TUNIT_OPM:       for.inc35:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
@@ -1964,11 +1988,11 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_NPM:       for.body:
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 [[I]]
-; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC]]
 ; IS__TUNIT_NPM:       for.inc:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2:%.*]]
 ; IS__TUNIT_NPM:       for.cond2:
@@ -1982,11 +2006,11 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_NPM-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[I15]], i64 [[I17]]
-; IS__TUNIT_NPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA26:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC9]]
 ; IS__TUNIT_NPM:       for.inc9:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end11:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13:%.*]]
 ; IS__TUNIT_NPM:       for.cond13:
@@ -2000,17 +2024,17 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_NPM-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; IS__TUNIT_NPM-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, i64* [[I18]], i64 [[I20]]
-; IS__TUNIT_NPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 8, !tbaa [[TBAA28:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    store i64 0, i64* [[ARRAYIDX20]], align 8, !tbaa [[TBAA20]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC21]]
 ; IS__TUNIT_NPM:       for.inc21:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end23:
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 1023
-; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500
 ; IS__TUNIT_NPM-NEXT:    [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32*
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR11]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR13]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND27:%.*]]
 ; IS__TUNIT_NPM:       for.cond27:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -2020,9 +2044,9 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_END37:%.*]]
 ; IS__TUNIT_NPM:       for.body30:
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    [[I22:%.*]] = load i8, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_NPM-NEXT:    [[I22:%.*]] = load i8, i8* [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    store i8 [[I22]], i8* [[ARRAYIDX34]], align 1, !tbaa [[TBAA19]]
+; IS__TUNIT_NPM-NEXT:    store i8 [[I22]], i8* [[ARRAYIDX34]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC35]]
 ; IS__TUNIT_NPM:       for.inc35:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
@@ -2089,7 +2113,7 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__CGSCC_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; IS__CGSCC_OPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500
 ; IS__CGSCC_OPM-NEXT:    [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32*
-; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR14]]
+; IS__CGSCC_OPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR15]]
 ; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND27:%.*]]
 ; IS__CGSCC_OPM:       for.cond27:
 ; IS__CGSCC_OPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -2168,7 +2192,7 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__CGSCC_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; IS__CGSCC_NPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500
 ; IS__CGSCC_NPM-NEXT:    [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32*
-; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR12]]
+; IS__CGSCC_NPM-NEXT:    call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR14]]
 ; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND27:%.*]]
 ; IS__CGSCC_NPM:       for.cond27:
 ; IS__CGSCC_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -2301,9 +2325,9 @@ define i32 @local_alloca_not_simplifiable_1() {
 ; IS________OPM-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; IS________OPM-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; IS________OPM-NEXT:    [[I:%.*]] = bitcast i32* [[X]] to i8*
-; IS________OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12:[0-9]+]]
+; IS________OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13:[0-9]+]]
 ; IS________OPM-NEXT:    [[I1:%.*]] = bitcast i32* [[Y]] to i8*
-; IS________OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
+; IS________OPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR13]]
 ; IS________OPM-NEXT:    store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]]
 ; IS________OPM-NEXT:    store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]]
 ; IS________OPM-NEXT:    [[I2:%.*]] = bitcast i32* [[X]] to i8*
@@ -2326,9 +2350,9 @@ define i32 @local_alloca_not_simplifiable_1() {
 ; IS________NPM-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; IS________NPM-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; IS________NPM-NEXT:    [[I:%.*]] = bitcast i32* [[X]] to i8*
-; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10:[0-9]+]]
+; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12:[0-9]+]]
 ; IS________NPM-NEXT:    [[I1:%.*]] = bitcast i32* [[Y]] to i8*
-; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR10]]
+; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]]
 ; IS________NPM-NEXT:    store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]]
 ; IS________NPM-NEXT:    store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]]
 ; IS________NPM-NEXT:    [[I2:%.*]] = bitcast i32* [[X]] to i8*
@@ -2513,7 +2537,7 @@ declare void @escape(i8*)
 define i32 @global_not_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; IS__TUNIT_OPM-SAME: (i32 [[CND:%.*]]) #[[ATTR8:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[I]]
@@ -2651,7 +2675,7 @@ define void @write_global() {
 define i32 @read_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@read_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR8]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR7]] {
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gint2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
 ;
@@ -2710,7 +2734,7 @@ define void @write_static_global() {
 define i32 @read_static_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@read_static_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR8]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR7]] {
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
 ;
@@ -2791,7 +2815,7 @@ define i32 @single_read_of_static_global() {
 define i8 @phi_store() {
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@phi_store
-; IS__TUNIT_OPM-SAME: () #[[ATTR9:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR8:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = alloca i16, align 2
 ; IS__TUNIT_OPM-NEXT:    [[B:%.*]] = bitcast i16* [[A]] to i8*
@@ -2885,6 +2909,7 @@ end:
 
 ; FIXME: This function returns 1.
 define i8 @phi_no_store_1() {
+;
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@phi_no_store_1
 ; IS__TUNIT_OPM-SAME: () #[[ATTR3]] {
@@ -2979,6 +3004,7 @@ end:
 
 ; FIXME: This function returns 1.
 define i8 @phi_no_store_2() {
+;
 ; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@phi_no_store_2
 ; IS__TUNIT_OPM-SAME: () #[[ATTR3]] {
@@ -3068,19 +3094,19 @@ end:
 }
 
 define i8 @phi_no_store_3() {
-; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@phi_no_store_3
-; IS__TUNIT_OPM-SAME: () #[[ATTR7]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    br label [[LOOP:%.*]]
-; IS__TUNIT_OPM:       loop:
-; IS__TUNIT_OPM-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ getelementptr (i8, i8* bitcast (i32* @a3 to i8*), i64 2), [[LOOP]] ]
-; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
-; IS__TUNIT_OPM-NEXT:    [[O]] = add nsw i8 [[I]], 1
-; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; IS__TUNIT_OPM:       end:
-; IS__TUNIT_OPM-NEXT:    ret i8 1
+; IS________OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
+; IS________OPM-LABEL: define {{[^@]+}}@phi_no_store_3
+; IS________OPM-SAME: () #[[ATTR9:[0-9]+]] {
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    br label [[LOOP:%.*]]
+; IS________OPM:       loop:
+; IS________OPM-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ getelementptr (i8, i8* bitcast (i32* @a3 to i8*), i64 2), [[LOOP]] ]
+; IS________OPM-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; IS________OPM-NEXT:    [[O]] = add nsw i8 [[I]], 1
+; IS________OPM-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
+; IS________OPM-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; IS________OPM:       end:
+; IS________OPM-NEXT:    ret i8 1
 ;
 ; IS________NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS________NPM-LABEL: define {{[^@]+}}@phi_no_store_3
@@ -3096,20 +3122,6 @@ define i8 @phi_no_store_3() {
 ; IS________NPM:       end:
 ; IS________NPM-NEXT:    ret i8 1
 ;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@phi_no_store_3
-; IS__CGSCC_OPM-SAME: () #[[ATTR9:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    br label [[LOOP:%.*]]
-; IS__CGSCC_OPM:       loop:
-; IS__CGSCC_OPM-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ getelementptr (i8, i8* bitcast (i32* @a3 to i8*), i64 2), [[LOOP]] ]
-; IS__CGSCC_OPM-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
-; IS__CGSCC_OPM-NEXT:    [[O]] = add nsw i8 [[I]], 1
-; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
-; IS__CGSCC_OPM:       end:
-; IS__CGSCC_OPM-NEXT:    ret i8 1
-;
 entry:
   %b = bitcast i32* @a3 to i8*
   %s30 = getelementptr i8, i8* %b, i64 3
@@ -3189,35 +3201,36 @@ define i64 @cast_and_load_2() {
 }
 
 define void @recursive_load_store(i64 %N, i32 %v) {
-; NOT_CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@recursive_load_store
-; NOT_CGSCC_OPM-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
-; NOT_CGSCC_OPM-NEXT:  entry:
-; NOT_CGSCC_OPM-NEXT:    br label [[FOR_COND:%.*]]
-; NOT_CGSCC_OPM:       for.cond:
-; NOT_CGSCC_OPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; NOT_CGSCC_OPM-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; NOT_CGSCC_OPM-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; NOT_CGSCC_OPM:       for.body:
-; NOT_CGSCC_OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; NOT_CGSCC_OPM-NEXT:    br label [[FOR_COND]]
-; NOT_CGSCC_OPM:       for.end:
-; NOT_CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@recursive_load_store
-; IS__CGSCC_OPM-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR9]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND:%.*]]
-; IS__CGSCC_OPM:       for.cond:
-; IS__CGSCC_OPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; IS__CGSCC_OPM-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; IS__CGSCC_OPM-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; IS__CGSCC_OPM:       for.body:
-; IS__CGSCC_OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND]]
-; IS__CGSCC_OPM:       for.end:
-; IS__CGSCC_OPM-NEXT:    ret void
+;
+; IS________OPM: Function Attrs: nofree norecurse nosync nounwind writeonly
+; IS________OPM-LABEL: define {{[^@]+}}@recursive_load_store
+; IS________OPM-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR9]] {
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    br label [[FOR_COND:%.*]]
+; IS________OPM:       for.cond:
+; IS________OPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; IS________OPM-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
+; IS________OPM-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; IS________OPM:       for.body:
+; IS________OPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; IS________OPM-NEXT:    br label [[FOR_COND]]
+; IS________OPM:       for.end:
+; IS________OPM-NEXT:    ret void
+;
+; IS________NPM: Function Attrs: nofree norecurse nosync nounwind writeonly
+; IS________NPM-LABEL: define {{[^@]+}}@recursive_load_store
+; IS________NPM-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
+; IS________NPM-NEXT:  entry:
+; IS________NPM-NEXT:    br label [[FOR_COND:%.*]]
+; IS________NPM:       for.cond:
+; IS________NPM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; IS________NPM-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
+; IS________NPM-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; IS________NPM:       for.body:
+; IS________NPM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; IS________NPM-NEXT:    br label [[FOR_COND]]
+; IS________NPM:       for.end:
+; IS________NPM-NEXT:    ret void
 ;
 entry:
   store i32 %v, i32* @rec_storage
@@ -3244,11 +3257,11 @@ define dso_local i32 @round_trip_malloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_malloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR15:[0-9]+]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR16:[0-9]+]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    store i32 [[X]], i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; IS________OPM-NEXT:    call void @free(i8* noundef [[CALL]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* noundef [[CALL]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP1]]
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@round_trip_malloc
@@ -3273,11 +3286,11 @@ entry:
 define dso_local i32 @round_trip_malloc_constant() {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant() {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    store i32 7, i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; IS________OPM-NEXT:    call void @free(i8* noundef [[CALL]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* noundef [[CALL]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP1]]
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant() {
@@ -3302,7 +3315,7 @@ define dso_local i32 @conditional_malloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@conditional_malloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS________OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
@@ -3346,7 +3359,7 @@ define dso_local i32 @round_trip_calloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_calloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    store i32 [[X]], i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
@@ -3373,7 +3386,7 @@ entry:
 define dso_local i32 @round_trip_calloc_constant() {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_calloc_constant() {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    store i32 11, i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
@@ -3402,7 +3415,7 @@ define dso_local i32 @conditional_calloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@conditional_calloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS________OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
@@ -3412,7 +3425,7 @@ define dso_local i32 @conditional_calloc(i32 %x) {
 ; IS________OPM:       if.end:
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* [[TMP2]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* [[TMP2]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP1]]
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@conditional_calloc
@@ -3452,7 +3465,7 @@ define dso_local i32 @conditional_calloc_zero(i1 %c) {
 ; IS________OPM-LABEL: define {{[^@]+}}@conditional_calloc_zero
 ; IS________OPM-SAME: (i1 [[C:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; IS________OPM:       if.then:
@@ -3461,7 +3474,7 @@ define dso_local i32 @conditional_calloc_zero(i1 %c) {
 ; IS________OPM:       if.end:
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
 ; IS________OPM-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* [[TMP2]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* [[TMP2]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP1]]
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@conditional_calloc_zero
@@ -3498,7 +3511,7 @@ define dso_local i32* @malloc_like(i32 %s) {
 ; IS________OPM-SAME: (i32 [[S:%.*]]) {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS________OPM-NEXT:    ret i32* [[TMP0]]
 ;
@@ -3506,7 +3519,7 @@ define dso_local i32* @malloc_like(i32 %s) {
 ; IS__TUNIT_NPM-SAME: (i32 [[S:%.*]]) {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR12:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR14:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS__TUNIT_NPM-NEXT:    ret i32* [[TMP0]]
 ;
@@ -3514,7 +3527,7 @@ define dso_local i32* @malloc_like(i32 %s) {
 ; IS__CGSCC_NPM-SAME: (i32 [[S:%.*]]) {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR13:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR15:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
 ; IS__CGSCC_NPM-NEXT:    ret i32* [[TMP0]]
 ;
@@ -3529,31 +3542,31 @@ define dso_local i32 @round_trip_malloc_like(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_malloc_like
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_like
 ; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_like
 ; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
@@ -3569,31 +3582,31 @@ define dso_local i32 @round_trip_unknown_alloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
 ; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
 ; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    store i32 [[X]], i32* [[CALL]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    call void @free(i8* noundef [[TMP1]]) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
@@ -3611,7 +3624,7 @@ define dso_local i32 @conditional_unknown_alloc(i32 %x) {
 ; IS________OPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc
 ; IS________OPM-SAME: (i32 [[X:%.*]]) {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR15]]
+; IS________OPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR16]]
 ; IS________OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS________OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; IS________OPM:       if.then:
@@ -3620,13 +3633,13 @@ define dso_local i32 @conditional_unknown_alloc(i32 %x) {
 ; IS________OPM:       if.end:
 ; IS________OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR15]]
+; IS________OPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR16]]
 ; IS________OPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc
 ; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS__TUNIT_NPM-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; IS__TUNIT_NPM:       if.then:
@@ -3635,13 +3648,13 @@ define dso_local i32 @conditional_unknown_alloc(i32 %x) {
 ; IS__TUNIT_NPM:       if.end:
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR12]]
+; IS__TUNIT_NPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR14]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc
 ; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS__CGSCC_NPM-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; IS__CGSCC_NPM:       if.then:
@@ -3650,7 +3663,7 @@ define dso_local i32 @conditional_unknown_alloc(i32 %x) {
 ; IS__CGSCC_NPM:       if.end:
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR13]]
+; IS__CGSCC_NPM-NEXT:    call void @free(i8* [[TMP1]]) #[[ATTR15]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
@@ -3688,7 +3701,7 @@ define dso_local void @test_nested_memory(float* %dst, double* %src) {
 ; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC2]] to double**
 ; IS__TUNIT_OPM-NEXT:    store double* [[SRC]], double** [[TMP1]], align 8
 ; IS__TUNIT_OPM-NEXT:    store i8* [[CALL]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
-; IS__TUNIT_OPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR16:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR17:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_nested_memory
@@ -3704,7 +3717,7 @@ define dso_local void @test_nested_memory(float* %dst, double* %src) {
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC2]] to double**
 ; IS__TUNIT_NPM-NEXT:    store double* [[SRC]], double** [[TMP2]], align 8
 ; IS__TUNIT_NPM-NEXT:    store i8* [[TMP1]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
-; IS__TUNIT_NPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR13:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR15:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_nested_memory
@@ -3720,7 +3733,7 @@ define dso_local void @test_nested_memory(float* %dst, double* %src) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC2]] to double**
 ; IS__CGSCC_OPM-NEXT:    store double* [[SRC]], double** [[TMP1]], align 8
 ; IS__CGSCC_OPM-NEXT:    store i8* [[CALL]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
-; IS__CGSCC_OPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR14]]
+; IS__CGSCC_OPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR15]]
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test_nested_memory
@@ -3736,7 +3749,7 @@ define dso_local void @test_nested_memory(float* %dst, double* %src) {
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC2]] to double**
 ; IS__CGSCC_NPM-NEXT:    store double* [[SRC]], double** [[TMP2]], align 8
 ; IS__CGSCC_NPM-NEXT:    store i8* [[TMP1]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
-; IS__CGSCC_NPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR12]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @nested_memory_callee() #[[ATTR14]]
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -3851,7 +3864,7 @@ define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out,
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[BUF:%.*]] = alloca [128 x i32], align 16
 ; IS________NPM-NEXT:    [[TMP0:%.*]] = bitcast [128 x i32]* [[BUF]] to i8*
-; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 512, i8* nocapture nofree noundef nonnull align 16 dereferenceable(512) [[TMP0]]) #[[ATTR10]]
+; IS________NPM-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 512, i8* nocapture nofree noundef nonnull align 16 dereferenceable(512) [[TMP0]]) #[[ATTR12]]
 ; IS________NPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS________NPM:       for.cond:
 ; IS________NPM-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -3872,7 +3885,7 @@ define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out,
 ; IS________NPM-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128
 ; IS________NPM-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]]
 ; IS________NPM:       for.cond.cleanup6:
-; IS________NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 512, i8* nocapture nofree noundef nonnull align 16 dereferenceable(512) [[TMP0]]) #[[ATTR10]]
+; IS________NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 512, i8* nocapture nofree noundef nonnull align 16 dereferenceable(512) [[TMP0]]) #[[ATTR12]]
 ; IS________NPM-NEXT:    ret void
 ; IS________NPM:       for.body7:
 ; IS________NPM-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64
@@ -3935,6 +3948,92 @@ for.body7:                                        ; preds = %for.cond4
   br label %for.cond4, !llvm.loop !12
 }
 
+; Ensure we do not return true.
+define internal i1 @alloca_non_unique(i32* %p, i32 %in, i1 %c) {
+; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@alloca_non_unique
+; IS__TUNIT_OPM-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] {
+; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = alloca i32, align 4
+; IS__TUNIT_OPM-NEXT:    store i32 [[IN]], i32* [[A]], align 4
+; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__TUNIT_OPM:       t:
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR18:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    ret i1 [[R]]
+; IS__TUNIT_OPM:       f:
+; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* [[P]], align 4
+; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
+; IS__TUNIT_OPM-NEXT:    ret i1 [[CMP]]
+;
+; IS________NPM: Function Attrs: argmemonly nofree nosync nounwind
+; IS________NPM-LABEL: define {{[^@]+}}@alloca_non_unique
+; IS________NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] {
+; IS________NPM-NEXT:    [[A:%.*]] = alloca i32, align 4
+; IS________NPM-NEXT:    store i32 [[IN]], i32* [[A]], align 4
+; IS________NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS________NPM:       t:
+; IS________NPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR16:[0-9]+]]
+; IS________NPM-NEXT:    ret i1 [[R]]
+; IS________NPM:       f:
+; IS________NPM-NEXT:    [[L:%.*]] = load i32, i32* [[P]], align 4
+; IS________NPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
+; IS________NPM-NEXT:    ret i1 [[CMP]]
+;
+; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@alloca_non_unique
+; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] {
+; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = alloca i32, align 4
+; IS__CGSCC_OPM-NEXT:    store i32 [[IN]], i32* [[A]], align 4
+; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__CGSCC_OPM:       t:
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR17:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    ret i1 [[R]]
+; IS__CGSCC_OPM:       f:
+; IS__CGSCC_OPM-NEXT:    [[L:%.*]] = load i32, i32* [[P]], align 4
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
+; IS__CGSCC_OPM-NEXT:    ret i1 [[CMP]]
+;
+  %a = alloca i32
+  store i32 %in, i32* %a
+  br i1 %c, label %t, label %f
+t:
+  %r = call i1 @alloca_non_unique(i32* %a, i32 42, i1 false)
+  ret i1 %r
+f:
+  %l = load i32, i32* %p
+  %cmp = icmp eq i32 %in, %l
+  ret i1 %cmp
+}
+
+; Ensure we do not return true.
+define i1 @alloca_non_unique_caller(i32 %in, i1 %c) {
+; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; IS__TUNIT_OPM-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR18]]
+; IS__TUNIT_OPM-NEXT:    ret i1 [[R]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; IS__TUNIT_NPM-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR16]]
+; IS__TUNIT_NPM-NEXT:    ret i1 [[R]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; IS__CGSCC_OPM-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR7]] {
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR15]]
+; IS__CGSCC_OPM-NEXT:    ret i1 [[R]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; IS__CGSCC_NPM-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR14]]
+; IS__CGSCC_NPM-NEXT:    ret i1 [[R]]
+;
+  %r = call i1 @alloca_non_unique(i32* undef, i32 %in, i1 %c)
+  ret i1 %r
+}
+
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
 
@@ -3978,16 +4077,18 @@ for.body7:                                        ; preds = %for.cond4
 ; IS__TUNIT_OPM: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn }
+; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind writeonly }
 ; IS__TUNIT_OPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn uwtable }
 ; IS__TUNIT_OPM: attributes #[[ATTR11]] = { argmemonly nofree norecurse nosync nounwind }
-; IS__TUNIT_OPM: attributes #[[ATTR12]] = { willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR14]] = { nofree nosync nounwind writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR15]] = { norecurse }
-; IS__TUNIT_OPM: attributes #[[ATTR16]] = { nounwind }
+; IS__TUNIT_OPM: attributes #[[ATTR12]] = { argmemonly nofree nosync nounwind }
+; IS__TUNIT_OPM: attributes #[[ATTR13]] = { willreturn }
+; IS__TUNIT_OPM: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR15]] = { nofree nosync nounwind writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR16]] = { norecurse }
+; IS__TUNIT_OPM: attributes #[[ATTR17]] = { nounwind }
+; IS__TUNIT_OPM: attributes #[[ATTR18]] = { nofree nosync nounwind }
 ;.
 ; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__TUNIT_NPM: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn }
@@ -3998,11 +4099,14 @@ for.body7:                                        ; preds = %for.cond4
 ; IS__TUNIT_NPM: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind writeonly }
 ; IS__TUNIT_NPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn uwtable }
-; IS__TUNIT_NPM: attributes #[[ATTR9:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
-; IS__TUNIT_NPM: attributes #[[ATTR10]] = { willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_NPM: attributes #[[ATTR12]] = { norecurse }
-; IS__TUNIT_NPM: attributes #[[ATTR13]] = { nounwind }
+; IS__TUNIT_NPM: attributes #[[ATTR9]] = { argmemonly nofree nosync nounwind }
+; IS__TUNIT_NPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone }
+; IS__TUNIT_NPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR12]] = { willreturn }
+; IS__TUNIT_NPM: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR14]] = { norecurse }
+; IS__TUNIT_NPM: attributes #[[ATTR15]] = { nounwind }
+; IS__TUNIT_NPM: attributes #[[ATTR16]] = { nofree nosync nounwind }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn }
@@ -4016,10 +4120,12 @@ for.body7:                                        ; preds = %for.cond4
 ; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind writeonly }
 ; IS__CGSCC_OPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn uwtable }
 ; IS__CGSCC_OPM: attributes #[[ATTR11]] = { argmemonly nofree norecurse nosync nounwind }
-; IS__CGSCC_OPM: attributes #[[ATTR12]] = { willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR13]] = { nounwind willreturn writeonly }
-; IS__CGSCC_OPM: attributes #[[ATTR14]] = { nounwind }
-; IS__CGSCC_OPM: attributes #[[ATTR15]] = { norecurse }
+; IS__CGSCC_OPM: attributes #[[ATTR12]] = { argmemonly nofree nosync nounwind }
+; IS__CGSCC_OPM: attributes #[[ATTR13]] = { willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR14]] = { nounwind willreturn writeonly }
+; IS__CGSCC_OPM: attributes #[[ATTR15]] = { nounwind }
+; IS__CGSCC_OPM: attributes #[[ATTR16]] = { norecurse }
+; IS__CGSCC_OPM: attributes #[[ATTR17]] = { nofree nosync nounwind }
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn }
@@ -4030,75 +4136,45 @@ for.body7:                                        ; preds = %for.cond4
 ; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind writeonly }
 ; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn uwtable }
-; IS__CGSCC_NPM: attributes #[[ATTR9:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
-; IS__CGSCC_NPM: attributes #[[ATTR10]] = { willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR11]] = { nounwind willreturn writeonly }
-; IS__CGSCC_NPM: attributes #[[ATTR12]] = { nounwind }
-; IS__CGSCC_NPM: attributes #[[ATTR13]] = { norecurse }
-;.
-; IS__TUNIT____: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; IS__TUNIT____: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
-; IS__TUNIT____: [[META2:![0-9]+]] = !{!"clang version 13.0.0"}
-; IS__TUNIT____: [[TBAA3]] = !{!4, !4, i64 0}
-; IS__TUNIT____: [[META4:![0-9]+]] = !{!"int", !5, i64 0}
-; IS__TUNIT____: [[META5:![0-9]+]] = !{!"omnipotent char", !6, i64 0}
-; IS__TUNIT____: [[META6:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; IS__TUNIT____: [[META7:![0-9]+]] = !{!8, !9, i64 12}
-; IS__TUNIT____: [[META8:![0-9]+]] = !{!"S", !4, i64 0, !4, i64 4, !4, i64 8, !9, i64 12, !9, i64 16, !9, i64 20}
-; IS__TUNIT____: [[META9:![0-9]+]] = !{!"float", !5, i64 0}
-; IS__TUNIT____: [[META10:![0-9]+]] = !{!8, !9, i64 16}
-; IS__TUNIT____: [[META11:![0-9]+]] = !{!8, !9, i64 20}
-; IS__TUNIT____: [[META12:![0-9]+]] = !{!8, !4, i64 0}
-; IS__TUNIT____: [[META13:![0-9]+]] = !{!8, !4, i64 4}
-; IS__TUNIT____: [[META14:![0-9]+]] = !{!8, !4, i64 8}
-; IS__TUNIT____: [[META15:![0-9]+]] = distinct !{!15, !16}
-; IS__TUNIT____: [[META16:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; IS__TUNIT____: [[META17:![0-9]+]] = distinct !{!17, !16}
-; IS__TUNIT____: [[META18:![0-9]+]] = distinct !{!18, !16}
-; IS__TUNIT____: [[META19:![0-9]+]] = !{!5, !5, i64 0}
-; IS__TUNIT____: [[META20:![0-9]+]] = distinct !{!20, !16}
-; IS__TUNIT____: [[META21:![0-9]+]] = distinct !{!21, !16}
-; IS__TUNIT____: [[META22:![0-9]+]] = distinct !{!22, !16}
-; IS__TUNIT____: [[META23:![0-9]+]] = distinct !{!23, !16}
-; IS__TUNIT____: [[META24:![0-9]+]] = distinct !{!24, !16}
-; IS__TUNIT____: [[META25:![0-9]+]] = distinct !{!25, !16}
-; IS__TUNIT____: [[META26:![0-9]+]] = !{!9, !9, i64 0}
-; IS__TUNIT____: [[META27:![0-9]+]] = distinct !{!27, !16}
-; IS__TUNIT____: [[META28:![0-9]+]] = !{!29, !29, i64 0}
-; IS__TUNIT____: [[META29:![0-9]+]] = !{!"long long", !5, i64 0}
-; IS__TUNIT____: [[META30:![0-9]+]] = distinct !{!30, !16}
-; IS__TUNIT____: [[META31:![0-9]+]] = distinct !{!31, !16}
+; IS__CGSCC_NPM: attributes #[[ATTR9]] = { argmemonly nofree nosync nounwind }
+; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC_NPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__CGSCC_NPM: attributes #[[ATTR12]] = { willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR13]] = { nounwind willreturn writeonly }
+; IS__CGSCC_NPM: attributes #[[ATTR14]] = { nounwind }
+; IS__CGSCC_NPM: attributes #[[ATTR15]] = { norecurse }
+; IS__CGSCC_NPM: attributes #[[ATTR16]] = { nofree nosync nounwind }
 ;.
-; IS__CGSCC____: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; IS__CGSCC____: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
-; IS__CGSCC____: [[META2:![0-9]+]] = !{!"clang version 13.0.0"}
-; IS__CGSCC____: [[TBAA3]] = !{!4, !4, i64 0}
-; IS__CGSCC____: [[META4:![0-9]+]] = !{!"int", !5, i64 0}
-; IS__CGSCC____: [[META5:![0-9]+]] = !{!"omnipotent char", !6, i64 0}
-; IS__CGSCC____: [[META6:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; IS__CGSCC____: [[META7:![0-9]+]] = !{!8, !9, i64 12}
-; IS__CGSCC____: [[META8:![0-9]+]] = !{!"S", !4, i64 0, !4, i64 4, !4, i64 8, !9, i64 12, !9, i64 16, !9, i64 20}
-; IS__CGSCC____: [[META9:![0-9]+]] = !{!"float", !5, i64 0}
-; IS__CGSCC____: [[META10:![0-9]+]] = !{!8, !9, i64 16}
-; IS__CGSCC____: [[META11:![0-9]+]] = !{!8, !9, i64 20}
-; IS__CGSCC____: [[META12:![0-9]+]] = !{!8, !4, i64 0}
-; IS__CGSCC____: [[META13:![0-9]+]] = !{!8, !4, i64 4}
-; IS__CGSCC____: [[META14:![0-9]+]] = !{!8, !4, i64 8}
-; IS__CGSCC____: [[META15:![0-9]+]] = !{!5, !5, i64 0}
-; IS__CGSCC____: [[META16:![0-9]+]] = distinct !{!16, !17}
-; IS__CGSCC____: [[META17:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; IS__CGSCC____: [[META18:![0-9]+]] = !{!9, !9, i64 0}
-; IS__CGSCC____: [[META19:![0-9]+]] = distinct !{!19, !17}
-; IS__CGSCC____: [[META20:![0-9]+]] = !{!21, !21, i64 0}
-; IS__CGSCC____: [[META21:![0-9]+]] = !{!"long long", !5, i64 0}
-; IS__CGSCC____: [[META22:![0-9]+]] = distinct !{!22, !17}
-; IS__CGSCC____: [[META23:![0-9]+]] = distinct !{!23, !17}
-; IS__CGSCC____: [[META24:![0-9]+]] = distinct !{!24, !17}
-; IS__CGSCC____: [[META25:![0-9]+]] = distinct !{!25, !17}
-; IS__CGSCC____: [[META26:![0-9]+]] = distinct !{!26, !17}
-; IS__CGSCC____: [[META27:![0-9]+]] = distinct !{!27, !17}
-; IS__CGSCC____: [[META28:![0-9]+]] = distinct !{!28, !17}
-; IS__CGSCC____: [[META29:![0-9]+]] = distinct !{!29, !17}
-; IS__CGSCC____: [[META30:![0-9]+]] = distinct !{!30, !17}
-; IS__CGSCC____: [[META31:![0-9]+]] = distinct !{!31, !17}
+; CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{!"clang version 13.0.0"}
+; CHECK: [[TBAA3]] = !{!4, !4, i64 0}
+; CHECK: [[META4:![0-9]+]] = !{!"int", !5, i64 0}
+; CHECK: [[META5:![0-9]+]] = !{!"omnipotent char", !6, i64 0}
+; CHECK: [[META6:![0-9]+]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[META7:![0-9]+]] = !{!8, !9, i64 12}
+; CHECK: [[META8:![0-9]+]] = !{!"S", !4, i64 0, !4, i64 4, !4, i64 8, !9, i64 12, !9, i64 16, !9, i64 20}
+; CHECK: [[META9:![0-9]+]] = !{!"float", !5, i64 0}
+; CHECK: [[META10:![0-9]+]] = !{!8, !9, i64 16}
+; CHECK: [[META11:![0-9]+]] = !{!8, !9, i64 20}
+; CHECK: [[META12:![0-9]+]] = !{!8, !4, i64 0}
+; CHECK: [[META13:![0-9]+]] = !{!8, !4, i64 4}
+; CHECK: [[META14:![0-9]+]] = !{!8, !4, i64 8}
+; CHECK: [[META15:![0-9]+]] = !{!5, !5, i64 0}
+; CHECK: [[META16:![0-9]+]] = distinct !{!16, !17}
+; CHECK: [[META17:![0-9]+]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META18:![0-9]+]] = !{!9, !9, i64 0}
+; CHECK: [[META19:![0-9]+]] = distinct !{!19, !17}
+; CHECK: [[META20:![0-9]+]] = !{!21, !21, i64 0}
+; CHECK: [[META21:![0-9]+]] = !{!"long long", !5, i64 0}
+; CHECK: [[META22:![0-9]+]] = distinct !{!22, !17}
+; CHECK: [[META23:![0-9]+]] = distinct !{!23, !17}
+; CHECK: [[META24:![0-9]+]] = distinct !{!24, !17}
+; CHECK: [[META25:![0-9]+]] = distinct !{!25, !17}
+; CHECK: [[META26:![0-9]+]] = distinct !{!26, !17}
+; CHECK: [[META27:![0-9]+]] = distinct !{!27, !17}
+; CHECK: [[META28:![0-9]+]] = distinct !{!28, !17}
+; CHECK: [[META29:![0-9]+]] = distinct !{!29, !17}
+; CHECK: [[META30:![0-9]+]] = distinct !{!30, !17}
+; CHECK: [[META31:![0-9]+]] = distinct !{!31, !17}
 ;.

diff  --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 772235456ba39..5e042c832e22f 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -570,7 +570,7 @@ define i8* @complicated_args_byval2() {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_byval2() {
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT_X:%.*]], %struct.X* @S, i32 0, i32 0), align 8
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i8* @test_byval2(i8* noalias nofree readnone "no-capture-maybe-returned" [[TMP1]])
-; IS__CGSCC_NPM-NEXT:    ret i8* [[TMP1]]
+; IS__CGSCC_NPM-NEXT:    ret i8* [[C]]
 ;
   %c = call i8* @test_byval2(%struct.X* byval(%struct.X) @S)
   ret i8* %c
@@ -1247,8 +1247,8 @@ define i8 @memcpy_uses_store_caller(i8 %arg) {
 
 declare i32 @speculatable() speculatable readnone
 
-define i32 @test_speculatable_expr() {
-; IS__TUNIT_OPM: Function Attrs: nosync readnone
+define i32 @test_speculatable_expr() norecurse {
+; IS__TUNIT_OPM: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_speculatable_expr
 ; IS__TUNIT_OPM-SAME: () #[[ATTR8:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
@@ -1258,7 +1258,7 @@ define i32 @test_speculatable_expr() {
 ; IS__TUNIT_OPM-NEXT:    [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[STACK]]) #[[ATTR14:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[RSPEC]]
 ;
-; IS__TUNIT_NPM: Function Attrs: nosync readnone
+; IS__TUNIT_NPM: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_speculatable_expr
 ; IS__TUNIT_NPM-SAME: () #[[ATTR7:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
@@ -1269,7 +1269,7 @@ define i32 @test_speculatable_expr() {
 ; IS__TUNIT_NPM-NEXT:    [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR13:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[RSPEC]]
 ;
-; IS__CGSCC_OPM: Function Attrs: nosync readnone
+; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_speculatable_expr
 ; IS__CGSCC_OPM-SAME: () #[[ATTR9:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
@@ -1278,7 +1278,7 @@ define i32 @test_speculatable_expr() {
 ; IS__CGSCC_OPM-NEXT:    [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32* noalias nocapture nofree noundef nonnull readnone align 4 dereferenceable(4) [[STACK]]) #[[ATTR16:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[RSPEC]]
 ;
-; IS__CGSCC_NPM: Function Attrs: nosync readnone
+; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test_speculatable_expr
 ; IS__CGSCC_NPM-SAME: () #[[ATTR9:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    [[STACK:%.*]] = alloca i32, align 4
@@ -1353,7 +1353,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
 ; IS__TUNIT_OPM: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind writeonly }
 ; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR7:[0-9]+]] = { readnone speculatable }
-; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nosync readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR8]] = { norecurse nosync readnone }
 ; IS__TUNIT_OPM: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR10:[0-9]+]] = { argmemonly nofree nounwind willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR11]] = { nofree nosync nounwind readnone willreturn }
@@ -1368,7 +1368,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
 ; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR6:[0-9]+]] = { readnone speculatable }
-; IS__TUNIT_NPM: attributes #[[ATTR7]] = { nosync readnone }
+; IS__TUNIT_NPM: attributes #[[ATTR7]] = { norecurse nosync readnone }
 ; IS__TUNIT_NPM: attributes #[[ATTR8]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR9:[0-9]+]] = { argmemonly nofree nounwind willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR10]] = { nofree nosync nounwind readnone willreturn }
@@ -1385,7 +1385,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
 ; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable }
-; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nosync readnone }
+; IS__CGSCC_OPM: attributes #[[ATTR9]] = { norecurse nosync readnone }
 ; IS__CGSCC_OPM: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR12]] = { willreturn }
@@ -1403,7 +1403,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
 ; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable }
-; IS__CGSCC_NPM: attributes #[[ATTR9]] = { nosync readnone }
+; IS__CGSCC_NPM: attributes #[[ATTR9]] = { norecurse nosync readnone }
 ; IS__CGSCC_NPM: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR12]] = { willreturn }

diff  --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 7de421f728f10..15502b34dea42 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -166,7 +166,8 @@ define void @pos_priv_mem() {
 ; CHECK-NEXT:    call void @aligned_barrier()
 ; CHECK-NEXT:    [[ARGC:%.*]] = addrspacecast i32 addrspace(5)* [[ARG]] to i32*
 ; CHECK-NEXT:    store i32 [[B]], i32* [[ARGC]], align 4
-; CHECK-NEXT:    store i32 [[A]], i32* @PG1, align 4
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[LOC]], align 4
+; CHECK-NEXT:    store i32 [[V]], i32* @PG1, align 4
 ; CHECK-NEXT:    ret void
 ;
   %arg = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5

diff  --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index fd039bf911317..c7ebc9550a9ab 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -1637,7 +1637,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU:       if.then:
 ; AMDGPU-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU:       if.end:
-; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
 ; AMDGPU-NEXT:    br label [[RETURN]]
@@ -2613,7 +2614,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX:       if.then:
 ; NVPTX-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX:       if.end:
-; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
 ; NVPTX-NEXT:    br label [[RETURN]]
@@ -3344,7 +3346,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED:       if.then:
 ; AMDGPU-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU-DISABLED:       if.end:
-; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    br label [[RETURN]]
@@ -4045,7 +4048,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED:       if.then:
 ; NVPTX-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX-DISABLED:       if.end:
-; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    br label [[RETURN]]

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index 587117be0613b..658b868982dc1 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -4928,21 +4928,17 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
-; CHECK1-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
 ; CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK1-NEXT:    store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK1-NEXT:    store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK1-NEXT:    store float* [[P]], float** [[GEP_P]], align 8
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4953,19 +4949,16 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
-; CHECK1-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
-; CHECK1-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
 ; CHECK1-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
 ; CHECK1-NEXT:    [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -4974,9 +4967,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -4993,7 +4986,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
+; CHECK1-NEXT:    [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
 ; CHECK1-NEXT:    store float [[ADD]], float* [[LOADGEP_P]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5212,24 +5206,20 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
-; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK1-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK1-NEXT:    store i32* [[B]], i32** [[GEP_B]], align 8
 ; CHECK1-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5242,19 +5232,16 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
-; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK1-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK1-NEXT:    [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -5263,9 +5250,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5282,8 +5269,9 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
 ; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5399,23 +5387,19 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
-; CHECK1-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK1-NEXT:    store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK1-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK1-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5426,19 +5410,16 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
-; CHECK1-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK1-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK1-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK1-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -5447,9 +5428,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5466,7 +5447,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
 ; CHECK1-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
 ; CHECK1-NEXT:    store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@@ -5586,25 +5568,21 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
-; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK1-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK1-NEXT:    store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
+; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
 ; CHECK1-NEXT:    store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5617,21 +5595,18 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
-; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK1-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK1-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
+; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
 ; CHECK1-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -5640,9 +5615,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5652,9 +5627,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK1-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
-; CHECK1-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK1-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK1:       omp_region.end4:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@@ -5671,8 +5646,9 @@ entry:
 ; CHECK1:       omp_region.body5:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK1:       seq.par.merged2:
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
+; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
 ; CHECK1-NEXT:    store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split.split:
@@ -5683,7 +5659,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
 ; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5800,7 +5777,8 @@ entry:
 ; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    ret void
@@ -6144,21 +6122,17 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
-; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
 ; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK2-NEXT:    store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK2-NEXT:    store float* [[P]], float** [[GEP_P]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6169,19 +6143,16 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
 ; CHECK2-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
 ; CHECK2-NEXT:    [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -6190,9 +6161,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6209,7 +6180,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
+; CHECK2-NEXT:    [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
 ; CHECK2-NEXT:    store float [[ADD]], float* [[LOADGEP_P]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -6428,24 +6400,20 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
-; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK2-NEXT:    store i32* [[B]], i32** [[GEP_B]], align 8
 ; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6458,19 +6426,16 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK2-NEXT:    [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -6479,9 +6444,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6498,8 +6463,9 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
 ; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -6615,23 +6581,19 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
-; CHECK2-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK2-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK2-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6642,19 +6604,16 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -6663,9 +6622,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6682,7 +6641,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
 ; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
 ; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@@ -6802,25 +6762,21 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
-; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
 ; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
 ; CHECK2-NEXT:    store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
 ; CHECK2-NEXT:    store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6833,21 +6789,18 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
 ; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
 ; CHECK2-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
 ; CHECK2-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
@@ -6856,9 +6809,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6868,9 +6821,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
-; CHECK2-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK2-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK2:       omp_region.end4:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@@ -6887,8 +6840,9 @@ entry:
 ; CHECK2:       omp_region.body5:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK2:       seq.par.merged2:
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
+; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
 ; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split:
@@ -6899,7 +6853,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
 ; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -7016,7 +6971,8 @@ entry:
 ; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void


        


More information about the llvm-commits mailing list